diff options
author | Linus Torvalds | 2022-05-23 13:56:39 -0700 |
---|---|---|
committer | Linus Torvalds | 2022-05-23 13:56:39 -0700 |
commit | 115cd47132d71bd7e4aa1093e15d861a59e73a94 (patch) | |
tree | 42e457126de728c9328e4b9b09b5ca4852a590de | |
parent | f6792c877a1cacc3b3eea7cb5b45857b3c484c51 (diff) | |
parent | 2aaf516084184e4e6f80da01b2b3ed882fd20a79 (diff) |
Merge tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"Here are the core block changes for 5.19. This contains:
- blk-throttle accounting fix (Laibin)
- Series removing redundant assignments (Michal)
- Expose bio cache via the bio_set, so that DM can use it (Mike)
- Finish off the bio allocation interface cleanups by dealing with
the weirdest member of the family. bio_kmalloc combines a kmalloc
for the bio and bio_vecs with a hidden bio_init call and magic
cleanup semantics (Christoph)
- Clean up the block layer API so that APIs consumed by file systems
are (almost) only struct block_device based, so that file systems
don't have to poke into block layer internals like the
request_queue (Christoph)
- Clean up the blk_execute_rq* API (Christoph)
- Clean up various lose end in the blk-cgroup code to make it easier
to follow in preparation of reworking the blkcg assignment for bios
(Christoph)
- Fix use-after-free issues in BFQ when processes with merged queues
get moved to different cgroups (Jan)
- BFQ fixes (Jan)
- Various fixes and cleanups (Bart, Chengming, Fanjun, Julia, Ming,
Wolfgang, me)"
* tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block: (83 commits)
blk-mq: fix typo in comment
bfq: Remove bfq_requeue_request_body()
bfq: Remove superfluous conversion from RQ_BIC()
bfq: Allow current waker to defend against a tentative one
bfq: Relax waker detection for shared queues
blk-cgroup: delete rcu_read_lock_held() WARN_ON_ONCE()
blk-throttle: Set BIO_THROTTLED when bio has been throttled
blk-cgroup: Remove unnecessary rcu_read_lock/unlock()
blk-cgroup: always terminate io.stat lines
block, bfq: make bfq_has_work() more accurate
block, bfq: protect 'bfqd->queued' by 'bfqd->lock'
block: cleanup the VM accounting in submit_bio
block: Fix the bio.bi_opf comment
block: reorder the REQ_ flags
blk-iocost: combine local_stat and desc_stat to stat
block: improve the error message from bio_check_eod
block: allow passing a NULL bdev to bio_alloc_clone/bio_init_clone
block: remove superfluous calls to blkcg_bio_issue_init
kthread: unexport kthread_blkcg
blk-cgroup: cleanup blkcg_maybe_throttle_current
...
127 files changed, 1248 insertions, 1506 deletions
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b03269faef71..085ffdf98e57 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -483,7 +483,6 @@ static void ubd_handler(void) if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { blk_queue_max_discard_sectors(io_req->req->q, 0); blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); } blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); @@ -803,7 +802,6 @@ static int ubd_open_dev(struct ubd *ubd_dev) ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); } blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); return 0; diff --git a/block/Makefile b/block/Makefile index 3950ecbc5c26..4e01bb71ad6e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o +obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o diff --git a/block/badblocks.c b/block/badblocks.c index d39056630d9c..3afb550c0f7b 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors, s >>= bb->shift; target += (1<<bb->shift) - 1; target >>= bb->shift; - sectors = target - s; } /* 'target' is now the first block after the bad range */ @@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) s += (1<<bb->shift) - 1; s >>= bb->shift; target >>= bb->shift; - sectors = target - s; } write_seqlock_irq(&bb->lock); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 420eda2589c0..09574af83566 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; + bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, entity->sched_data = &parent->sched_data; } -static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, - struct blkcg *blkcg) +static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { - struct blkcg_gq *blkg; - - blkg = blkg_lookup(blkcg, bfqd->queue); - if (likely(blkg)) - return blkg_to_bfqg(blkg); - return NULL; -} - -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg) -{ - struct bfq_group *bfqg, *parent; + struct bfq_group *parent; struct bfq_entity *entity; - bfqg = bfq_lookup_bfqg(bfqd, blkcg); - - if (unlikely(!bfqg)) - return NULL; - /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet @@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } +} - return bfqg; +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct bfq_group *bfqg; + + while (blkg) { + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); + return bfqg; + } + blkg = blkg->parent; + } + bio_associate_blkg_from_css(bio, + &bfqg_to_blkg(bfqd->root_group)->blkcg->css); + return bfqd->root_group; } /** @@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. */ -static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct blkcg *blkcg) +static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); - - if (unlikely(!bfqg)) - bfqg = bfqd->root_group; - if (async_bfqq) { entity = &async_bfqq->entity; @@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + } else { + struct bfq_queue *bfqq; + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != + &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is + * not valid anymore. We cannot easily just + * cancel the merge (by clearing new_bfqq) as + * there may be other processes using this + * queue and holding refs to all queues below + * sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from + * bfqq now so that we cannot merge bio to a + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; @@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct bfq_group *bfqg = NULL; + struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; - rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) - goto out; + return; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + /* + * New cgroup for this process. Make sure it is linked to bfq internal + * cgroup hierarchy. + */ + bfq_link_bfqg(bfqd, bfqg); + __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following @@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; -out: - rcu_read_unlock(); } /** @@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); + bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg) +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 1f62dbdc521f..0d46cb728bbf 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600; */ static const unsigned long bfq_late_stable_merging = 600; -#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) +#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0])) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) @@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) */ void bfq_schedule_dispatch(struct bfq_data *bfqd) { + lockdep_assert_held(&bfqd->lock); + if (bfqd->queued != 0) { bfq_log(bfqd, "schedule dispatch"); blk_mq_run_hw_queues(bfqd->queue, true); @@ -2133,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->last_completed_rq_bfqq || bfqd->last_completed_rq_bfqq == bfqq || bfq_bfqq_has_short_ttime(bfqq) || - bfqq->dispatched > 0 || - now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC || - bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq) + now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC) return; /* @@ -2208,9 +2208,13 @@ static void bfq_add_request(struct request *rq) bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); bfqq->queued[rq_is_sync(rq)]++; - bfqd->queued++; + /* + * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it + * may be read without holding the lock in bfq_has_work(). + */ + WRITE_ONCE(bfqd->queued, bfqd->queued + 1); - if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { + if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) { bfq_check_waker(bfqd, bfqq, now_ns); /* @@ -2400,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q, if (rq->queuelist.prev != &rq->queuelist) list_del_init(&rq->queuelist); bfqq->queued[sync]--; - bfqd->queued--; + /* + * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it + * may be read without holding the lock in bfq_has_work(). + */ + WRITE_ONCE(bfqd->queued, bfqd->queued - 1); elv_rb_del(&bfqq->sort_list, rq); elv_rqhash_del(q, rq); @@ -2463,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); - if (bic) + if (bic) { + /* + * Make sure cgroup info is uptodate for current process before + * considering the merge. + */ + bfq_bic_update_cgroup(bic, bio); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - else + } else { bfqd->bio_bfqq = NULL; + } bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); @@ -2496,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } -static struct bfq_queue *bfq_init_rq(struct request *rq); - static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -2506,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = bfq_init_rq(req); + struct bfq_queue *bfqq = RQ_BFQQ(req); struct bfq_data *bfqd; struct request *prev, *next_rq; @@ -2558,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = bfq_init_rq(rq), - *next_bfqq = bfq_init_rq(next); + struct bfq_queue *bfqq = RQ_BFQQ(rq), + *next_bfqq = RQ_BFQQ(next); if (!bfqq) goto remove; @@ -2764,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; + /* + * Make sure merged queues belong to the same parent. Parents could + * have changed since the time we decided the two queues are suitable + * for merging. + */ + if (new_bfqq->entity.parent != bfqq->entity.parent) + return NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); @@ -2901,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_queue *new_bfqq = bfq_setup_merge(bfqq, stable_merge_bfqq); - bic->stably_merged = true; - if (new_bfqq && new_bfqq->bic) - new_bfqq->bic->stably_merged = true; + if (new_bfqq) { + bic->stably_merged = true; + if (new_bfqq->bic) + new_bfqq->bic->stably_merged = + true; + } return new_bfqq; } else return NULL; @@ -5045,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; /* - * Avoiding lock: a race on bfqd->busy_queues should cause at + * Avoiding lock: a race on bfqd->queued should cause at * most a call to dispatch for nothing */ return !list_empty_careful(&bfqd->dispatch) || - bfq_tot_busy_queues(bfqd) > 0; + READ_ONCE(bfqd->queued); } static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) @@ -5310,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void bfq_put_cooperator(struct bfq_queue *bfqq) +void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; @@ -5716,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; - rcu_read_lock(); - - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); - if (!bfqg) { - bfqq = &bfqd->oom_bfqq; - goto out; - } - + bfqg = bfq_bio_bfqg(bfqd, bio); if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); @@ -5769,8 +5786,6 @@ out: if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn) bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic); - - rcu_read_unlock(); return bfqq; } @@ -6117,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -6132,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (blk_mq_sched_try_insert_merge(q, rq, &free)) { spin_unlock_irq(&bfqd->lock); blk_mq_free_requests(&free); return; } - spin_unlock_irq(&bfqd->lock); - trace_block_rq_insert(rq); - spin_lock_irq(&bfqd->lock); - bfqq = bfq_init_rq(rq); if (!bfqq || at_head) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); @@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_schedule_dispatch(bfqd); } -static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) -{ - bfqq_request_freed(bfqq); - bfq_put_queue(bfqq); -} - /* * The processes associated with bfqq may happen to generate their * cumulative I/O at a lower rate than the rate at which the device @@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); } - bfq_finish_requeue_request_body(bfqq); + bfqq_request_freed(bfqq); + bfq_put_queue(bfqq); + RQ_BIC(rq)->requests--; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq_request_allocated(bfqq); bfqq->ref++; + bic->requests++; bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); @@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_bfqq_expire(bfqd, bfqq, true, reason); schedule_dispatch: - spin_unlock_irqrestore(&bfqd->lock, flags); bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(&bfqd->lock, flags); } /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 3b83e3d1c2e5..ca8177d7bf7c 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -468,6 +468,7 @@ struct bfq_io_cq { struct bfq_queue *stable_merge_bfqq; bool stably_merged; /* non splittable if true */ + unsigned int requests; /* Number of requests this process has in flight */ }; /** @@ -928,6 +929,8 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; + /* Is bfq_group still online? */ + bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; @@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); +void bfq_put_cooperator(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); @@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg); +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); @@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq); break; \ bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \ blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ + &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \ "%s " fmt, pid_str, ##args); \ } while (0) #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \ + &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \ } while (0) #else /* CONFIG_BFQ_GROUP_IOSCHED */ diff --git a/block/bio.c b/block/bio.c index 4259125e16ab..a3893d80dccc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit); static void bio_free(struct bio *bio) { struct bio_set *bs = bio->bi_pool; - void *p; - - bio_uninit(bio); + void *p = bio; - if (bs) { - bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs); + WARN_ON_ONCE(!bs); - /* - * If we have front padding, adjust the bio pointer before freeing - */ - p = bio; - p -= bs->front_pad; - - mempool_free(p, &bs->bio_pool); - } else { - /* Bio was allocated by bio_kmalloc() */ - kfree(bio); - } + bio_uninit(bio); + bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs); + mempool_free(p - bs->front_pad, &bs->bio_pool); } /* @@ -419,6 +408,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs) queue_work(bs->rescue_workqueue, &bs->rescue_work); } +static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, gfp_t gfp, + struct bio_set *bs) +{ + struct bio_alloc_cache *cache; + struct bio *bio; + + cache = per_cpu_ptr(bs->cache, get_cpu()); + if (!cache->free_list) { + put_cpu(); + return NULL; + } + bio = cache->free_list; + cache->free_list = bio->bi_next; + cache->nr--; + put_cpu(); + + bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf); + bio->bi_pool = bs; + return bio; +} + /** * bio_alloc_bioset - allocate a bio for I/O * @bdev: block device to allocate the bio for (can be %NULL) @@ -451,6 +462,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs) * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad * for per bio allocations. * + * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process + * context, not hard/soft IRQ. + * * Returns: Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, @@ -465,6 +479,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) return NULL; + if (opf & REQ_ALLOC_CACHE) { + if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { + bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, + gfp_mask, bs); + if (bio) + return bio; + /* + * No cached bio available, bio returned below marked with + * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache. + */ + } else { + opf &= ~REQ_ALLOC_CACHE; + } + } + /* * submit_bio_noacct() converts recursion to iteration; this means if * we're running beneath it, any bios we allocate and submit will not be @@ -528,28 +557,28 @@ err_free: EXPORT_SYMBOL(bio_alloc_bioset); /** - * bio_kmalloc - kmalloc a bio for I/O + * bio_kmalloc - kmalloc a bio + * @nr_vecs: number of bio_vecs to allocate * @gfp_mask: the GFP_* mask given to the slab allocator - * @nr_iovecs: number of iovecs to pre-allocate * - * Use kmalloc to allocate and initialize a bio. + * Use kmalloc to allocate a bio (including bvecs). The bio must be initialized + * using bio_init() before use. To free a bio returned from this function use + * kfree() after calling bio_uninit(). A bio returned from this function can + * be reused by calling bio_uninit() before calling bio_init() again. + * + * Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this + * function are not backed by a mempool can can fail. Do not use this function + * for allocations in the file system I/O path. * * Returns: Pointer to new bio on success, NULL on failure. */ -struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs) +struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask) { struct bio *bio; - if (nr_iovecs > UIO_MAXIOV) - return NULL; - - bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask); - if (unlikely(!bio)) + if (nr_vecs > UIO_MAXIOV) return NULL; - bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs, - 0); - bio->bi_pool = NULL; - return bio; + return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask); } EXPORT_SYMBOL(bio_kmalloc); @@ -711,7 +740,7 @@ void bio_put(struct bio *bio) return; } - if (bio_flagged(bio, BIO_PERCPU_CACHE)) { + if (bio->bi_opf & REQ_ALLOC_CACHE) { struct bio_alloc_cache *cache; bio_uninit(bio); @@ -732,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio_set_flag(bio, BIO_CLONED); if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); - if (bio->bi_bdev == bio_src->bi_bdev && - bio_flagged(bio_src, BIO_REMAPPED)) - bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_iter = bio_src->bi_iter; - bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); + if (bio->bi_bdev) { + if (bio->bi_bdev == bio_src->bi_bdev && + bio_flagged(bio_src, BIO_REMAPPED)) + bio_set_flag(bio, BIO_REMAPPED); + bio_clone_blkg_association(bio, bio_src); + } if (bio_crypt_clone(bio, bio_src, gfp) < 0) return -ENOMEM; @@ -1727,55 +1757,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) flags |= BIOSET_NEED_BVECS; if (src->rescue_workqueue) flags |= BIOSET_NEED_RESCUER; + if (src->cache) + flags |= BIOSET_PERCPU_CACHE; return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags); } EXPORT_SYMBOL(bioset_init_from_src); -/** - * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb - * @kiocb: kiocb describing the IO - * @bdev: block device to allocate the bio for (can be %NULL) - * @nr_vecs: number of iovecs to pre-allocate - * @opf: operation and flags for bio - * @bs: bio_set to allocate from - * - * Description: - * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only - * used to check if we should dip into the per-cpu bio_set allocation - * cache. The allocation uses GFP_KERNEL internally. On return, the - * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio - * MUST be done from process context, not hard/soft IRQ. - * - */ -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, struct bio_set *bs) -{ - struct bio_alloc_cache *cache; - struct bio *bio; - - if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) - return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); - - cache = per_cpu_ptr(bs->cache, get_cpu()); - if (cache->free_list) { - bio = cache->free_list; - cache->free_list = bio->bi_next; - cache->nr--; - put_cpu(); - bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, - nr_vecs, opf); - bio->bi_pool = bs; - bio_set_flag(bio, BIO_PERCPU_CACHE); - return bio; - } - put_cpu(); - bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); - bio_set_flag(bio, BIO_PERCPU_CACHE); - return bio; -} -EXPORT_SYMBOL_GPL(bio_alloc_kiocb); - static int __init init_bio(void) { int i; diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c new file mode 100644 index 000000000000..760a2e1878dd --- /dev/null +++ b/block/blk-cgroup-fc-appid.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "blk-cgroup.h" + +/** + * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg + * @app_id: application identifier + * @cgrp_id: cgroup id + * @app_id_len: size of application identifier + */ +int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len) +{ + struct cgroup *cgrp; + struct cgroup_subsys_state *css; + struct blkcg *blkcg; + int ret = 0; + + if (app_id_len > FC_APPID_LEN) + return -EINVAL; + + cgrp = cgroup_get_from_id(cgrp_id); + if (!cgrp) + return -ENOENT; + css = cgroup_get_e_css(cgrp, &io_cgrp_subsys); + if (!css) { + ret = -ENOENT; + goto out_cgrp_put; + } + blkcg = css_to_blkcg(css); + /* + * There is a slight race condition on setting the appid. + * Worst case an I/O may not find the right id. + * This is no different from the I/O we let pass while obtaining + * the vmid from the fabric. + * Adding the overhead of a lock is not necessary. + */ + strlcpy(blkcg->fc_app_id, app_id, app_id_len); + css_put(css); +out_cgrp_put: + cgroup_put(cgrp); + return ret; +} +EXPORT_SYMBOL_GPL(blkcg_set_fc_appid); + +/** + * blkcg_get_fc_appid - get the fc app identifier associated with a bio + * @bio: target bio + * + * On success return the fc_app_id, on failure return NULL + */ +char *blkcg_get_fc_appid(struct bio *bio) +{ + if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0') + return NULL; + return bio->bi_blkg->blkcg->fc_app_id; +} +EXPORT_SYMBOL_GPL(blkcg_get_fc_appid); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8dfe62786cd5..40161a3f68d0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -156,6 +173,33 @@ static void blkg_async_bio_workfn(struct work_struct *work) } /** + * bio_blkcg_css - return the blkcg CSS associated with a bio + * @bio: target bio + * + * This returns the CSS for the blkcg associated with a bio, or %NULL if not + * associated. Callers are expected to either handle %NULL or know association + * has been done prior to calling this. + */ +struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio) +{ + if (!bio || !bio->bi_blkg) + return NULL; + return &bio->bi_blkg->blkcg->css; +} +EXPORT_SYMBOL_GPL(bio_blkcg_css); + +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + return css_to_blkcg(blkcg->css.parent); +} + +/** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with * @q: request_queue the new blkg is associated with @@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_gq *blkg; int i, ret; - WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(&q->queue_lock); /* request_queue is dying, do not create/recreate a blkg */ @@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) { struct blkg_iostat_set *bis = &blkg->iostat; u64 rbytes, wbytes, rios, wios, dbytes, dios; - bool has_stats = false; const char *dname; unsigned seq; int i; @@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) } while (u64_stats_fetch_retry(&bis->sync, seq)); if (rbytes || wbytes || rios || wios) { - has_stats = true; seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu", rbytes, wbytes, rios, wios, dbytes, dios); } if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { - has_stats = true; seq_printf(s, " use_delay=%d delay_nsec=%llu", atomic_read(&blkg->use_delay), atomic64_read(&blkg->delay_nsec)); @@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) if (!blkg->pd[i] || !pol->pd_stat_fn) continue; - if (pol->pd_stat_fn(blkg->pd[i], s)) - has_stats = true; + pol->pd_stat_fn(blkg->pd[i], s); } - if (has_stats) - seq_printf(s, "\n"); + seq_puts(s, "\n"); } static int blkcg_print_stat(struct seq_file *sf, void *v) @@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = { { } /* terminate */ }; +#ifdef CONFIG_CGROUP_WRITEBACK +struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) +{ + return &css_to_blkcg(css)->cgwb_list; +} +#endif + /* * blkcg destruction is a three-stage process. * @@ -1016,25 +1061,6 @@ static struct cftype blkcg_legacy_files[] = { */ /** - * blkcg_css_offline - cgroup css_offline callback - * @css: css of interest - * - * This function is called when @css is about to go away. Here the cgwbs are - * offlined first and only once writeback associated with the blkcg has - * finished do we start step 2 (see above). - */ -static void blkcg_css_offline(struct cgroup_subsys_state *css) -{ - struct blkcg *blkcg = css_to_blkcg(css); - - /* this prevents anyone from attaching or migrating to this blkcg */ - wb_blkcg_offline(blkcg); - - /* put the base online pin allowing step 2 to be triggered */ - blkcg_unpin_online(blkcg); -} - -/** * blkcg_destroy_blkgs - responsible for shooting down blkgs * @blkcg: blkcg of interest * @@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) * * This is the blkcg counterpart of ioc_release_fn(). */ -void blkcg_destroy_blkgs(struct blkcg *blkcg) +static void blkcg_destroy_blkgs(struct blkcg *blkcg) { might_sleep(); @@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) spin_unlock_irq(&blkcg->lock); } +/** + * blkcg_pin_online - pin online state + * @blkcg_css: blkcg of interest + * + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. + */ +void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css) +{ + refcount_inc(&css_to_blkcg(blkcg_css)->online_pin); +} + +/** + * blkcg_unpin_online - unpin online state + * @blkcg_css: blkcg of interest + * + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the + * blkcg can continue destruction by calling blkcg_destroy_blkgs(). + */ +void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css) +{ + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; + blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); +} + +/** + * blkcg_css_offline - cgroup css_offline callback + * @css: css of interest + * + * This function is called when @css is about to go away. Here the cgwbs are + * offlined first and only once writeback associated with the blkcg has + * finished do we start step 2 (see above). + */ +static void blkcg_css_offline(struct cgroup_subsys_state *css) +{ + /* this prevents anyone from attaching or migrating to this blkcg */ + wb_blkcg_offline(css); + + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(css); +} + static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); @@ -1163,8 +1240,7 @@ unlock: static int blkcg_css_online(struct cgroup_subsys_state *css) { - struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg *parent = blkcg_parent(blkcg); + struct blkcg *parent = blkcg_parent(css_to_blkcg(css)); /* * blkcg_pin_online() is used to delay blkcg offline so that blkgs @@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * parent so that offline always happens towards the root. */ if (parent) - blkcg_pin_online(parent); + blkcg_pin_online(css); return 0; } @@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q) preloaded = !radix_tree_preload(GFP_KERNEL); /* Make sure the root blkg exists. */ - rcu_read_lock(); + /* spin_lock_irq can serve as RCU read-side critical section. */ spin_lock_irq(&q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); if (IS_ERR(blkg)) goto err_unlock; q->root_blkg = blkg; spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); @@ -1234,7 +1309,6 @@ err_destroy_all: return ret; err_unlock: spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); return PTR_ERR(blkg); @@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) void blkcg_maybe_throttle_current(void) { struct request_queue *q = current->throttle_queue; - struct cgroup_subsys_state *css; struct blkcg *blkcg; struct blkcg_gq *blkg; bool use_memdelay = current->use_memdelay; @@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void) current->use_memdelay = false; rcu_read_lock(); - css = kthread_blkcg(); - if (css) - blkcg = css_to_blkcg(css); - else - blkcg = css_to_blkcg(task_css(current, io_cgrp_id)); - + blkcg = css_to_blkcg(blkcg_css()); if (!blkcg) goto out; blkg = blkg_lookup(blkcg, q); @@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio) rcu_read_lock(); if (bio->bi_blkg) - css = &bio_blkcg(bio)->css; + css = bio_blkcg_css(bio); else css = blkcg_css(); @@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio) put_cpu(); } +bool blk_cgroup_congested(void) +{ + struct cgroup_subsys_state *css; + bool ret = false; + + rcu_read_lock(); + for (css = blkcg_css(); css; css = css->parent) { + if (atomic_read(&css->cgroup->congestion_count)) { + ret = true; + break; + } + } + rcu_read_unlock(); + return ret; +} + static int __init blkcg_init(void) { blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 47e1e38390c9..d4de0a35e066 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,13 +15,101 @@ */ #include <linux/blk-cgroup.h> +#include <linux/cgroup.h> +#include <linux/kthread.h> #include <linux/blk-mq.h> +struct blkcg_gq; +struct blkg_policy_data; + + /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) #ifdef CONFIG_BLK_CGROUP +enum blkg_iostat_type { + BLKG_IOSTAT_READ, + BLKG_IOSTAT_WRITE, + BLKG_IOSTAT_DISCARD, + + BLKG_IOSTAT_NR, +}; + +struct blkg_iostat { + u64 bytes[BLKG_IOSTAT_NR]; + u64 ios[BLKG_IOSTAT_NR]; +}; + +struct blkg_iostat_set { + struct u64_stats_sync sync; + struct blkg_iostat cur; + struct blkg_iostat last; +}; + +/* association between a blk cgroup and a request queue */ +struct blkcg_gq { + /* Pointer to the associated request_queue */ + struct request_queue *q; + struct list_head q_node; + struct hlist_node blkcg_node; + struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + + /* reference count */ + struct percpu_ref refcnt; + + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + + struct blkg_iostat_set __percpu *iostat_cpu; + struct blkg_iostat_set iostat; + + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; + + spinlock_t async_bio_lock; + struct bio_list async_bios; + union { + struct work_struct async_bio_work; + struct work_struct free_work; + }; + + atomic_t use_delay; + atomic64_t delay_nsec; + atomic64_t delay_start; + u64 last_delay; + int last_use; + + struct rcu_head rcu_head; +}; + +struct blkcg { + struct cgroup_subsys_state css; + spinlock_t lock; + refcount_t online_pin; + + struct radix_tree_root blkg_tree; + struct blkcg_gq __rcu *blkg_hint; + struct hlist_head blkg_list; + + struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; + + struct list_head all_blkcgs_node; +#ifdef CONFIG_BLK_CGROUP_FC_APPID + char fc_app_id[FC_APPID_LEN]; +#endif +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; +#endif +}; + +static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct blkcg, css) : NULL; +} + /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, +typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, struct seq_file *s); struct blkcg_policy { @@ -123,52 +211,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, void blkg_conf_finish(struct blkg_conf_ctx *ctx); /** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} - -/** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - -/** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg * @return: true if this bio needs to be submitted with the root blkg context. * * In order to avoid priority inversions we sometimes need to issue a bio as if * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any + * blkg. The idea is we do bio_blkcg_css() to look up the actual context for + * the bio and attach the appropriate blkg to the bio. Then we call this helper + * and if it is true run with the root blkg for that queue and then do any * backcharging to the originating cgroup once the io is complete. */ static inline bool bio_issue_as_root_blkg(struct bio *bio) @@ -457,7 +507,8 @@ struct blkcg_policy_data { struct blkcg_policy { }; -#ifdef CONFIG_BLOCK +struct blkcg { +}; static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) @@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } - static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, struct blkcg_policy *pol) { return NULL; } static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } @@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #endif /* _BLK_CGROUP_PRIVATE_H */ diff --git a/block/blk-core.c b/block/blk-core.c index bc0506772152..80fa73c419a9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -588,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio) (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" - "%pg: rw=%d, want=%llu, limit=%llu\n", - current->comm, - bio->bi_bdev, bio->bi_opf, - bio_end_sector(bio), maxsector); + "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n", + current->comm, bio->bi_bdev, bio->bi_opf, + bio->bi_iter.bi_sector, nr_sectors, maxsector); return -EIO; } return 0; @@ -816,11 +815,11 @@ void submit_bio_noacct(struct bio *bio) switch (bio_op(bio)) { case REQ_OP_DISCARD: - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) goto not_supported; break; case REQ_OP_SECURE_ERASE: - if (!blk_queue_secure_erase(q)) + if (!bdev_max_secure_erase_sectors(bdev)) goto not_supported; break; case REQ_OP_ZONE_APPEND: @@ -889,19 +888,11 @@ void submit_bio(struct bio *bio) if (blkcg_punt_bio_submit(bio)) return; - /* - * If it's a regular read/write or a barrier with data attached, - * go through the normal accounting stuff before submission. - */ - if (bio_has_data(bio)) { - unsigned int count = bio_sectors(bio); - - if (op_is_write(bio_op(bio))) { - count_vm_events(PGPGOUT, count); - } else { - task_io_account_read(bio->bi_iter.bi_size); - count_vm_events(PGPGIN, count); - } + if (bio_op(bio) == REQ_OP_READ) { + task_io_account_read(bio->bi_iter.bi_size); + count_vm_events(PGPGIN, bio_sectors(bio)); + } else if (bio_op(bio) == REQ_OP_WRITE) { + count_vm_events(PGPGOUT, bio_sectors(bio)); } /* @@ -1018,21 +1009,22 @@ again: } } -static unsigned long __part_start_io_acct(struct block_device *part, - unsigned int sectors, unsigned int op, - unsigned long start_time) +unsigned long bdev_start_io_acct(struct block_device *bdev, + unsigned int sectors, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); part_stat_lock(); - update_io_ticks(part, start_time, false); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, sectors[sgrp], sectors); - part_stat_local_inc(part, in_flight[op_is_write(op)]); + update_io_ticks(bdev, start_time, false); + part_stat_inc(bdev, ios[sgrp]); + part_stat_add(bdev, sectors[sgrp], sectors); + part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); return start_time; } +EXPORT_SYMBOL(bdev_start_io_acct); /** * bio_start_io_acct_time - start I/O accounting for bio based drivers @@ -1041,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) { - __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), start_time); + bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), start_time); } EXPORT_SYMBOL_GPL(bio_start_io_acct_time); @@ -1054,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time); */ unsigned long bio_start_io_acct(struct bio *bio) { - return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), jiffies); + return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); -unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, - unsigned int op) -{ - return __part_start_io_acct(disk->part0, sectors, op, jiffies); -} -EXPORT_SYMBOL(disk_start_io_acct); - -static void __part_end_io_acct(struct block_device *part, unsigned int op, - unsigned long start_time) +void bdev_end_io_acct(struct block_device *bdev, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); unsigned long duration = now - start_time; part_stat_lock(); - update_io_ticks(part, now, true); - part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); - part_stat_local_dec(part, in_flight[op_is_write(op)]); + update_io_ticks(bdev, now, true); + part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); + part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); } +EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, - struct block_device *orig_bdev) + struct block_device *orig_bdev) { - __part_end_io_acct(orig_bdev, bio_op(bio), start_time); + bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); -void disk_end_io_acct(struct gendisk *disk, unsigned int op, - unsigned long start_time) -{ - __part_end_io_acct(disk->part0, op, start_time); -} -EXPORT_SYMBOL(disk_end_io_acct); - /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 7c854584b52b..621abd1b0e4d 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) src_bio->bi_status = enc_bio->bi_status; - bio_put(enc_bio); + bio_uninit(enc_bio); + kfree(enc_bio); bio_endio(src_bio); } static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) { + unsigned int nr_segs = bio_segments(bio_src); struct bvec_iter iter; struct bio_vec bv; struct bio *bio; - bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src)); + bio = bio_kmalloc(nr_segs, GFP_NOIO); if (!bio) return NULL; - bio->bi_bdev = bio_src->bi_bdev; + bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs, + bio_src->bi_opf); if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); - bio->bi_opf = bio_src->bi_opf; bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; @@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) bio->bi_io_vec[bio->bi_vcnt++] = bv; bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); return bio; } @@ -363,8 +364,8 @@ out_release_keyslot: blk_crypto_put_keyslot(slot); out_put_enc_bio: if (enc_bio) - bio_put(enc_bio); - + bio_uninit(enc_bio); + kfree(enc_bio); return ret; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9bd670999d0a..33a11ba971ea 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -533,8 +533,7 @@ struct ioc_gq { /* statistics */ struct iocg_pcpu_stat __percpu *pcpu_stat; - struct iocg_stat local_stat; - struct iocg_stat desc_stat; + struct iocg_stat stat; struct iocg_stat last_stat; u64 last_stat_abs_vusage; u64 usage_delta_us; @@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) return true; } else { if (iocg->indelay_since) { - iocg->local_stat.indelay_us += now->now - iocg->indelay_since; + iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = 0; } iocg->delay = 0; @@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, /* if debt is paid in full, restore inuse */ if (!iocg->abs_vdebt) { - iocg->local_stat.indebt_us += now->now - iocg->indebt_since; + iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = 0; propagate_weights(iocg, iocg->active, iocg->last_inuse, @@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt, if (!waitqueue_active(&iocg->waitq)) { if (iocg->wait_since) { - iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = 0; } return; @@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg, } } +/* propagate the deltas to the parent */ +static void iocg_flush_stat_upward(struct ioc_gq *iocg) +{ + if (iocg->level > 0) { + struct iocg_stat *parent_stat = + &iocg->ancestors[iocg->level - 1]->stat; + + parent_stat->usage_us += + iocg->stat.usage_us - iocg->last_stat.usage_us; + parent_stat->wait_us += + iocg->stat.wait_us - iocg->last_stat.wait_us; + parent_stat->indebt_us += + iocg->stat.indebt_us - iocg->last_stat.indebt_us; + parent_stat->indelay_us += + iocg->stat.indelay_us - iocg->last_stat.indelay_us; + } + + iocg->last_stat = iocg->stat; +} + /* collect per-cpu counters and propagate the deltas to the parent */ -static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now) +static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; - struct iocg_stat new_stat; u64 abs_vusage = 0; u64 vusage_delta; int cpu; @@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now) iocg->last_stat_abs_vusage = abs_vusage; iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate); - iocg->local_stat.usage_us += iocg->usage_delta_us; - - /* propagate upwards */ - new_stat.usage_us = - iocg->local_stat.usage_us + iocg->desc_stat.usage_us; - new_stat.wait_us = - iocg->local_stat.wait_us + iocg->desc_stat.wait_us; - new_stat.indebt_us = - iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us; - new_stat.indelay_us = - iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us; - - /* propagate the deltas to the parent */ - if (iocg->level > 0) { - struct iocg_stat *parent_stat = - &iocg->ancestors[iocg->level - 1]->desc_stat; + iocg->stat.usage_us += iocg->usage_delta_us; - parent_stat->usage_us += - new_stat.usage_us - iocg->last_stat.usage_us; - parent_stat->wait_us += - new_stat.wait_us - iocg->last_stat.wait_us; - parent_stat->indebt_us += - new_stat.indebt_us - iocg->last_stat.indebt_us; - parent_stat->indelay_us += - new_stat.indelay_us - iocg->last_stat.indelay_us; - } - - iocg->last_stat = new_stat; + iocg_flush_stat_upward(iocg); } /* get stat counters ready for reading on all active iocgs */ @@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now) /* flush leaves and build inner node walk list */ list_for_each_entry(iocg, target_iocgs, active_list) { - iocg_flush_stat_one(iocg, now); + iocg_flush_stat_leaf(iocg, now); iocg_build_inner_walk(iocg, &inner_walk); } /* keep flushing upwards by walking the inner list backwards */ list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) { - iocg_flush_stat_one(iocg, now); + iocg_flush_stat_upward(iocg); list_del_init(&iocg->walk_list); } } @@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) /* flush wait and indebt stat deltas */ if (iocg->wait_since) { - iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = now->now; } if (iocg->indebt_since) { - iocg->local_stat.indebt_us += + iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = now->now; } if (iocg->indelay_since) { - iocg->local_stat.indelay_us += + iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = now->now; } @@ -3005,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd) kfree(iocg); } -static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; if (!ioc->enabled) - return false; + return; if (iocg->level == 0) { unsigned vp10k = DIV64_U64_ROUND_CLOSEST( @@ -3027,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) iocg->last_stat.wait_us, iocg->last_stat.indebt_us, iocg->last_stat.indelay_us); - return true; } static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2f33932e72e3..5b676c7cf2b6 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } -static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) +static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) { struct latency_stat stat; int cpu; @@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) (unsigned long long)stat.ps.missed, (unsigned long long)stat.ps.total, iolat->rq_depth.max_depth); - return true; } -static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct iolatency_grp *iolat = pd_to_lat(pd); unsigned long long avg_lat; unsigned long long cur_win; if (!blkcg_debug_stats) - return false; + return; if (iolat->ssd) return iolatency_ssd_stat(iolat, s); @@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) else seq_printf(s, " depth=%u avg_lat=%llu win=%llu", iolat->rq_depth.max_depth, avg_lat, cur_win); - return true; } static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, diff --git a/block/blk-lib.c b/block/blk-lib.c index 237d60d8b585..09b7e1200c0f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -10,30 +10,44 @@ #include "blk.h" +static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) +{ + unsigned int discard_granularity = bdev_discard_granularity(bdev); + sector_t granularity_aligned_sector; + + if (bdev_is_partition(bdev)) + sector += bdev->bd_start_sect; + + granularity_aligned_sector = + round_up(sector, discard_granularity >> SECTOR_SHIFT); + + /* + * Make sure subsequent bios start aligned to the discard granularity if + * it needs to be split. + */ + if (granularity_aligned_sector != sector) + return granularity_aligned_sector - sector; + + /* + * Align the bio size to the discard granularity to make splitting the bio + * at discard granularity boundaries easier in the driver if needed. + */ + return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; +} + int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop) + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; - unsigned int op; - sector_t bs_mask, part_offset = 0; + sector_t bs_mask; if (bdev_read_only(bdev)) return -EPERM; - - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secure_erase(q)) - return -EOPNOTSUPP; - op = REQ_OP_SECURE_ERASE; - } else { - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; - op = REQ_OP_DISCARD; - } + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; /* In case the discard granularity isn't set by buggy device driver */ - if (WARN_ON_ONCE(!q->limits.discard_granularity)) { + if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) { char dev_name[BDEVNAME_SIZE]; bdevname(bdev, dev_name); @@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!nr_sects) return -EINVAL; - /* In case the discard request is in a partition */ - if (bdev_is_partition(bdev)) - part_offset = bdev->bd_start_sect; - while (nr_sects) { - sector_t granularity_aligned_lba, req_sects; - sector_t sector_mapped = sector + part_offset; - - granularity_aligned_lba = round_up(sector_mapped, - q->limits.discard_granularity >> SECTOR_SHIFT); - - /* - * Check whether the discard bio starts at a discard_granularity - * aligned LBA, - * - If no: set (granularity_aligned_lba - sector_mapped) to - * bi_size of the first split bio, then the second bio will - * start at a discard_granularity aligned LBA on the device. - * - If yes: use bio_aligned_discard_max_sectors() as the max - * possible bi_size of the first split bio. Then when this bio - * is split in device drive, the split ones are very probably - * to be aligned to discard_granularity of the device's queue. - */ - if (granularity_aligned_lba == sector_mapped) - req_sects = min_t(sector_t, nr_sects, - bio_aligned_discard_max_sectors(q)); - else - req_sects = min_t(sector_t, nr_sects, - granularity_aligned_lba - sector_mapped); - - WARN_ON_ONCE((req_sects << 9) > UINT_MAX); + sector_t req_sects = + min(nr_sects, bio_discard_limit(bdev, sector)); - bio = blk_next_bio(bio, bdev, 0, op, gfp_mask); + bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = req_sects << 9; sector += req_sects; @@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard); * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_DISCARD_* flags to control behaviour * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; int ret; blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, - &bio); + ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) @@ -316,3 +301,42 @@ retry: return ret; } EXPORT_SYMBOL(blkdev_issue_zeroout); + +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; + + if (max_sectors == 0) + return -EOPNOTSUPP; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; + + blk_start_plug(&plug); + for (;;) { + unsigned int len = min_t(sector_t, nr_sects, max_sectors); + + bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = len; + + sector += len << SECTOR_SHIFT; + nr_sects -= len << SECTOR_SHIFT; + if (!nr_sects) { + ret = submit_bio_wait(bio); + bio_put(bio); + break; + } + cond_resched(); + } + blk_finish_plug(&plug); + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_secure_erase); diff --git a/block/blk-map.c b/block/blk-map.c index c7f71d83eff1..df8b066cd548 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); ret = -ENOMEM; - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) goto out_bmd; - bio->bi_opf |= req_op(rq); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq)); if (map_data) { nr_pages = 1 << map_data->page_order; @@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, cleanup: if (!map_data) bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); out_bmd: kfree(bmd); return ret; @@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { unsigned int max_sectors = queue_max_hw_sectors(rq->q); + unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; int ret; int j; @@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (!iov_iter_count(iter)) return -EINVAL; - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS)); + bio = bio_kmalloc(nr_vecs, gfp_mask); if (!bio) return -ENOMEM; - bio->bi_opf |= req_op(rq); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); while (iov_iter_count(iter)) { struct page **pages; @@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - if (unlikely(offs & queue_dma_alignment(rq->q))) { - ret = -EINVAL; + if (unlikely(offs & queue_dma_alignment(rq->q))) j = 0; - } else { + else { for (j = 0; j < npages; j++) { struct page *page = pages[j]; unsigned int n = PAGE_SIZE - offs; @@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, out_unmap: bio_release_pages(bio, false); - bio_put(bio); + bio_uninit(bio); + kfree(bio); return ret; } @@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio) static void bio_map_kern_endio(struct bio *bio) { bio_invalidate_vmalloc_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); } /** @@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data, int offset, i; struct bio *bio; - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); if (is_vmalloc) { flush_kernel_vmap_range(data, len); @@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data, if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { /* we don't support partial mappings */ - bio_put(bio); + bio_uninit(bio); + kfree(bio); return ERR_PTR(-EINVAL); } @@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data, static void bio_copy_kern_endio(struct bio *bio) { bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); } static void bio_copy_kern_endio_read(struct bio *bio) @@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, return ERR_PTR(-EINVAL); nr_pages = end - start; - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); while (len) { struct page *page; @@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, cleanup: bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); return ERR_PTR(-ENOMEM); } @@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio) next_bio = bio; bio = bio->bi_next; - bio_put(next_bio); + bio_uninit(next_bio); + kfree(next_bio); } return ret; @@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, bio->bi_opf |= req_op(rq); ret = blk_rq_append_bio(rq, bio); - if (unlikely(ret)) - bio_put(bio); + if (unlikely(ret)) { + bio_uninit(bio); + kfree(bio); + } return ret; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index aa0349e9f083..7e4136a60e1c 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(FAIL_IO), QUEUE_FLAG_NAME(NONROT), QUEUE_FLAG_NAME(IO_STAT), - QUEUE_FLAG_NAME(DISCARD), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), - QUEUE_FLAG_NAME(SECERASE), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), diff --git a/block/blk-mq.c b/block/blk-mq.c index ed1869a305c4..ae116b755648 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); /* - * For a polled request, always complete locallly, it's pointless + * For a polled request, always complete locally, it's pointless * to redirect the completion. */ if (rq->cmd_flags & REQ_POLLED) diff --git a/block/blk-settings.c b/block/blk-settings.c index b83df3d2eebc..6ccceb421ed2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_zone_append_sectors = 0; lim->max_discard_sectors = 0; lim->max_hw_discard_sectors = 0; + lim->max_secure_erase_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; @@ -177,6 +178,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, EXPORT_SYMBOL(blk_queue_max_discard_sectors); /** + * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase + * @q: the request queue for the device + * @max_sectors: maximum number of sectors to secure_erase + **/ +void blk_queue_max_secure_erase_sectors(struct request_queue *q, + unsigned int max_sectors) +{ + q->limits.max_secure_erase_sectors = max_sectors; +} +EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors); + +/** * blk_queue_max_write_zeroes_sectors - set max sectors for a single * write zeroes * @q: the request queue for the device @@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); +static int queue_limit_alignment_offset(struct queue_limits *lim, + sector_t sector) +{ + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) + << SECTOR_SHIFT; + + return (granularity + lim->alignment_offset - alignment) % granularity; +} + +static unsigned int queue_limit_discard_alignment(struct queue_limits *lim, + sector_t sector) +{ + unsigned int alignment, granularity, offset; + + if (!lim->max_discard_sectors) + return 0; + + /* Why are these in bytes, not sectors? */ + alignment = lim->discard_alignment >> SECTOR_SHIFT; + granularity = lim->discard_granularity >> SECTOR_SHIFT; + if (!granularity) + return 0; + + /* Offset of the partition start in 'granularity' sectors */ + offset = sector_div(sector, granularity); + + /* And why do we do this modulus *again* in blkdev_issue_discard()? */ + offset = (granularity + alignment - offset) % granularity; + + /* Turn it back into bytes, gaah */ + return offset << SECTOR_SHIFT; +} + static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs) { sectors = round_down(sectors, lbs >> SECTOR_SHIFT); @@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % t->discard_granularity; } - + t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors, + b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); @@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) } } EXPORT_SYMBOL_GPL(blk_queue_set_zoned); + +int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->limits.misaligned) + return -1; + if (bdev_is_partition(bdev)) + return queue_limit_alignment_offset(&q->limits, + bdev->bd_start_sect); + return q->limits.alignment_offset; +} +EXPORT_SYMBOL_GPL(bdev_alignment_offset); + +unsigned int bdev_discard_alignment(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (bdev_is_partition(bdev)) + return queue_limit_discard_alignment(&q->limits, + bdev->bd_start_sect); + return q->limits.discard_alignment; +} +EXPORT_SYMBOL_GPL(bdev_discard_alignment); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 469c483719be..139b2d7a99e2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) break; \ if ((__tg)) { \ blk_add_cgroup_trace_msg(__td->queue, \ - tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\ + &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\ } else { \ blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \ } \ @@ -2189,13 +2189,14 @@ again: } out_unlock: - spin_unlock_irq(&q->queue_lock); bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; #endif + spin_unlock_irq(&q->queue_lock); + rcu_read_unlock(); return throttled; } diff --git a/block/blk.h b/block/blk.h index 8ccbc6e07636..434017701403 100644 --- a/block/blk.h +++ b/block/blk.h @@ -347,20 +347,6 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q) } /* - * The max bio size which is aligned to q->limits.discard_granularity. This - * is a hint to split large discard bio in generic block layer, then if device - * driver needs to split the discard bio into smaller ones, their bi_size can - * be very probably and easily aligned to discard_granularity of the device's - * queue. - */ -static inline unsigned int bio_aligned_discard_max_sectors( - struct request_queue *q) -{ - return round_down(UINT_MAX, q->limits.discard_granularity) >> - SECTOR_SHIFT; -} - -/* * Internal io_context interface */ struct io_cq *ioc_find_get_icq(struct request_queue *q); @@ -450,13 +436,6 @@ extern struct device_attribute dev_attr_events; extern struct device_attribute dev_attr_events_async; extern struct device_attribute dev_attr_events_poll_msecs; -static inline void bio_clear_polled(struct bio *bio) -{ - /* can't support alloc cache if we turn off polling */ - bio_clear_flag(bio, BIO_PERCPU_CACHE); - bio->bi_opf &= ~REQ_POLLED; -} - long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); diff --git a/block/bounce.c b/block/bounce.c index 467be46d0e65..8f7b6fe3b4db 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) goto err_put; bio_clone_blkg_association(bio, bio_src); - blkcg_bio_issue_init(bio); return bio; diff --git a/block/fops.c b/block/fops.c index 9f2ecec406b0..b9b83030e0df 100644 --- a/block/fops.c +++ b/block/fops.c @@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) #define DIO_INLINE_BIO_VECS 4 -static void blkdev_bio_end_io_simple(struct bio *bio) -{ - struct task_struct *waiter = bio->bi_private; - - WRITE_ONCE(bio->bi_private, NULL); - blk_wake_io_task(waiter); -} - static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, unsigned int nr_pages) { @@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio.bi_private = current; - bio.bi_end_io = blkdev_bio_end_io_simple; bio.bi_ioprio = iocb->ki_ioprio; ret = bio_iov_iter_get_pages(&bio, iter); @@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) bio.bi_opf |= REQ_NOWAIT; - if (iocb->ki_flags & IOCB_HIPRI) - bio_set_polled(&bio, iocb); - submit_bio(&bio); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!READ_ONCE(bio.bi_private)) - break; - if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) - blk_io_schedule(); - } - __set_current_state(TASK_RUNNING); + submit_bio_wait(&bio); bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) @@ -197,8 +177,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); - + if (iocb->ki_flags & IOCB_ALLOC_CACHE) + opf |= REQ_ALLOC_CACHE; + bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, + &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); atomic_set(&dio->ref, 1); /* @@ -320,7 +302,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); + if (iocb->ki_flags & IOCB_ALLOC_CACHE) + opf |= REQ_ALLOC_CACHE; + bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, + &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); dio->flags = 0; dio->iocb = iocb; @@ -672,7 +657,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, - len >> SECTOR_SHIFT, GFP_KERNEL, 0); + len >> SECTOR_SHIFT, GFP_KERNEL); break; default: error = -EOPNOTSUPP; diff --git a/block/genhd.c b/block/genhd.c index b8b6759d670f..36532b931841 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1010,7 +1010,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); + return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t disk_discard_alignment_show(struct device *dev, @@ -1019,7 +1019,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); + return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t diskseq_show(struct device *dev, diff --git a/block/ioctl.c b/block/ioctl.c index f8703db99c73..46949f1b0dba 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -83,18 +83,17 @@ static int compat_blkpg_ioctl(struct block_device *bdev, #endif static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, - unsigned long arg, unsigned long flags) + unsigned long arg) { uint64_t range[2]; uint64_t start, len; - struct request_queue *q = bdev_get_queue(bdev); struct inode *inode = bdev->bd_inode; int err; if (!(mode & FMODE_WRITE)) return -EBADF; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(range, (void __user *)arg, sizeof(range))) @@ -115,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) goto fail; - - err = blkdev_issue_discard(bdev, start >> 9, len >> 9, - GFP_KERNEL, flags); - + err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); fail: filemap_invalidate_unlock(inode->i_mapping); return err; } +static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode, + void __user *argp) +{ + uint64_t start, len; + uint64_t range[2]; + int err; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + if (!bdev_max_secure_erase_sectors(bdev)) + return -EOPNOTSUPP; + if (copy_from_user(range, argp, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; + if ((start & 511) || (len & 511)) + return -EINVAL; + if (start + len > bdev_nr_bytes(bdev)) + return -EINVAL; + + filemap_invalidate_lock(bdev->bd_inode->i_mapping); + err = truncate_bdev_range(bdev, mode, start, start + len - 1); + if (!err) + err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, + GFP_KERNEL); + filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + return err; +} + + static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, unsigned long arg) { @@ -451,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, case BLKROSET: return blkdev_roset(bdev, mode, cmd, arg); case BLKDISCARD: - return blk_ioctl_discard(bdev, mode, arg, 0); + return blk_ioctl_discard(bdev, mode, arg); case BLKSECDISCARD: - return blk_ioctl_discard(bdev, mode, arg, - BLKDEV_DISCARD_SECURE); + return blk_ioctl_secure_erase(bdev, mode, argp); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); case BLKGETDISKSEQ: @@ -489,7 +515,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, queue_max_sectors(bdev_get_queue(bdev))); return put_ushort(argp, max_sectors); case BLKROTATIONAL: - return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev))); + return put_ushort(argp, !bdev_nonrot(bdev)); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c index 2c381c694c57..d2fc122d7426 100644 --- a/block/partitions/acorn.c +++ b/block/partitions/acorn.c @@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state) #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: case PARTITION_RISCIX_MFM: - slot = riscix_partition(state, start_sect, slot, + riscix_partition(state, start_sect, slot, nr_sects); break; #endif case PARTITION_LINUX: - slot = linux_partition(state, start_sect, slot, + linux_partition(state, start_sect, slot, nr_sects); break; } diff --git a/block/partitions/atari.c b/block/partitions/atari.c index da5994175416..9655c728262a 100644 --- a/block/partitions/atari.c +++ b/block/partitions/atari.c @@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state) /* accept only GEM,BGM,RAW,LNX,SWP partitions */ if (!((pi->flg & 1) && OK_id(pi->id))) continue; - part_fmt = 2; put_partition (state, slot, be32_to_cpu(pi->st), be32_to_cpu(pi->siz)); diff --git a/block/partitions/core.c b/block/partitions/core.c index 2ef8dfa1e5c8..70dec1c78521 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -200,21 +200,13 @@ static ssize_t part_ro_show(struct device *dev, static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct block_device *bdev = dev_to_bdev(dev); - - return sprintf(buf, "%u\n", - queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits, - bdev->bd_start_sect)); + return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev))); } static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct block_device *bdev = dev_to_bdev(dev); - - return sprintf(buf, "%u\n", - queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits, - bdev->bd_start_sect)); + return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev))); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 27f6c7d9c776..38e58960ae03 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) len = r_cols; } else { r_stripe = 0; - r_cols = 0; len = r_parent; } if (len < 0) @@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid); r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1); len = r_id2; - } else { - r_id1 = 0; - r_id2 = 0; + } else len = r_diskid; - } if (len < 0) return false; @@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) r_id1 = ldm_relative (buffer, buflen, 0x44, r_name); r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1); len = r_id2; - } else { - r_id1 = 0; - r_id2 = 0; + } else len = r_name; - } if (len < 0) return false; @@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) return false; } len = r_index; - } else { - r_index = 0; + } else len = r_diskid; - } if (len < 0) { ldm_error("len %d < 0", len); return false; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b0b25cc916e..912560f611c3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) } } -/* communicated if (agreed_features & DRBD_FF_WSAME) */ -static void -assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, - struct request_queue *q) -{ - if (q) { - p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); - p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); - p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q)); - p->qlim->io_min = cpu_to_be32(queue_io_min(q)); - p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); - p->qlim->discard_enabled = blk_queue_discard(q); - p->qlim->write_same_capable = 0; - } else { - q = device->rq_queue; - p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); - p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); - p->qlim->alignment_offset = 0; - p->qlim->io_min = cpu_to_be32(queue_io_min(q)); - p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); - p->qlim->discard_enabled = 0; - p->qlim->write_same_capable = 0; - } -} - int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) { struct drbd_device *device = peer_device->device; @@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu memset(p, 0, packet_size); if (get_ldev_if_state(device, D_NEGOTIATING)) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); + struct block_device *bdev = device->ldev->backing_bdev; + struct request_queue *q = bdev_get_queue(bdev); + d_size = drbd_get_max_capacity(device->ldev); rcu_read_lock(); u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; @@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu q_order_type = drbd_queue_order_type(device); max_bio_size = queue_max_hw_sectors(q) << 9; max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); - assign_p_sizes_qlim(device, p, q); + p->qlim->physical_block_size = + cpu_to_be32(bdev_physical_block_size(bdev)); + p->qlim->logical_block_size = + cpu_to_be32(bdev_logical_block_size(bdev)); + p->qlim->alignment_offset = + cpu_to_be32(bdev_alignment_offset(bdev)); + p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev)); + p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev)); + p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev); put_ldev(device); } else { + struct request_queue *q = device->rq_queue; + + p->qlim->physical_block_size = + cpu_to_be32(queue_physical_block_size(q)); + p->qlim->logical_block_size = + cpu_to_be32(queue_logical_block_size(q)); + p->qlim->alignment_offset = 0; + p->qlim->io_min = cpu_to_be32(queue_io_min(q)); + p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); + p->qlim->discard_enabled = 0; + d_size = 0; u_size = 0; q_order_type = QUEUE_ORDERED_NONE; max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ - assign_p_sizes_qlim(device, p, NULL); } if (peer_device->connection->agreed_pro_version <= 94) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b7216c186ba4..a6280dcb3767 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1204,50 +1204,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) } static void decide_on_discard_support(struct drbd_device *device, - struct request_queue *q, - struct request_queue *b, - bool discard_zeroes_if_aligned) + struct drbd_backing_dev *bdev) { - /* q = drbd device queue (device->rq_queue) - * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue), - * or NULL if diskless - */ - struct drbd_connection *connection = first_peer_device(device)->connection; - bool can_do = b ? blk_queue_discard(b) : true; - - if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { - can_do = false; - drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - } - if (can_do) { - /* We don't care for the granularity, really. - * Stacking limits below should fix it for the local - * device. Whether or not it is a suitable granularity - * on the remote device is not our problem, really. If - * you care, you need to use devices with similar - * topology on all peers. */ - blk_queue_discard_granularity(q, 512); - q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); - } else { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); - blk_queue_discard_granularity(q, 0); - q->limits.max_discard_sectors = 0; - q->limits.max_write_zeroes_sectors = 0; - } -} + struct drbd_connection *connection = + first_peer_device(device)->connection; + struct request_queue *q = device->rq_queue; -static void fixup_discard_if_not_supported(struct request_queue *q) -{ - /* To avoid confusion, if this queue does not support discard, clear - * max_discard_sectors, which is what lsblk -D reports to the user. - * Older kernels got this wrong in "stack limits". - * */ - if (!blk_queue_discard(q)) { - blk_queue_max_discard_sectors(q, 0); - blk_queue_discard_granularity(q, 0); + if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) + goto not_supported; + + if (connection->cstate >= C_CONNECTED && + !(connection->agreed_features & DRBD_FF_TRIM)) { + drbd_info(connection, + "peer DRBD too old, does not support TRIM: disabling discards\n"); + goto not_supported; } + + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + blk_queue_discard_granularity(q, 512); + q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); + q->limits.max_write_zeroes_sectors = + drbd_max_discard_sectors(connection); + return; + +not_supported: + blk_queue_discard_granularity(q, 0); + q->limits.max_discard_sectors = 0; + q->limits.max_write_zeroes_sectors = 0; } static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) @@ -1273,7 +1263,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi unsigned int max_segments = 0; struct request_queue *b = NULL; struct disk_conf *dc; - bool discard_zeroes_if_aligned = true; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; @@ -1282,7 +1271,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi rcu_read_lock(); dc = rcu_dereference(device->ldev->disk_conf); max_segments = dc->max_bio_bvecs; - discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned; rcu_read_unlock(); blk_set_stacking_limits(&q->limits); @@ -1292,13 +1280,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi /* This is the workaround for "bio would need to, but cannot, be split" */ blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, q, b, discard_zeroes_if_aligned); + decide_on_discard_support(device, bdev); if (b) { blk_stack_limits(&q->limits, &b->limits, 0); disk_update_readahead(device->vdisk); } - fixup_discard_if_not_supported(q); fixup_write_zeroes(device, q); } @@ -1437,14 +1424,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf, struct drbd_backing_dev *nbc) { - struct request_queue * const q = nbc->backing_bdev->bd_disk->queue; + struct block_device *bdev = nbc->backing_bdev; if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); - if (!blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(bdev)) { if (disk_conf->rs_discard_granularity) { disk_conf->rs_discard_granularity = 0; /* disable feature */ drbd_info(device, "rs_discard_granularity feature disabled\n"); @@ -1453,16 +1440,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis if (disk_conf->rs_discard_granularity) { int orig_value = disk_conf->rs_discard_granularity; + sector_t discard_size = bdev_max_discard_sectors(bdev) << 9; + unsigned int discard_granularity = bdev_discard_granularity(bdev); int remainder; - if (q->limits.discard_granularity > disk_conf->rs_discard_granularity) - disk_conf->rs_discard_granularity = q->limits.discard_granularity; + if (discard_granularity > disk_conf->rs_discard_granularity) + disk_conf->rs_discard_granularity = discard_granularity; - remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity; + remainder = disk_conf->rs_discard_granularity % + discard_granularity; disk_conf->rs_discard_granularity += remainder; - if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9) - disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9; + if (disk_conf->rs_discard_granularity > discard_size) + disk_conf->rs_discard_granularity = discard_size; if (disk_conf->rs_discard_granularity != orig_value) drbd_info(device, "rs_discard_granularity changed to %d\n", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 08da922f81d1..2957b0b68d60 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1511,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags) { struct block_device *bdev = device->ldev->backing_bdev; - struct request_queue *q = bdev_get_queue(bdev); sector_t tmp, nr; unsigned int max_discard_sectors, granularity; int alignment; @@ -1521,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u goto zero_out; /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); + granularity = max(bdev_discard_granularity(bdev) >> 9, 1U); alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); + max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22)); max_discard_sectors -= max_discard_sectors % granularity; if (unlikely(!max_discard_sectors)) goto zero_out; @@ -1548,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u start = tmp; } while (nr_sectors >= max_discard_sectors) { - err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0); + err |= blkdev_issue_discard(bdev, start, max_discard_sectors, + GFP_NOIO); nr_sectors -= max_discard_sectors; start += max_discard_sectors; } @@ -1560,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u nr = nr_sectors; nr -= (unsigned int)nr % granularity; if (nr) { - err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); + err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO); nr_sectors -= nr; start += nr; } @@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u static bool can_do_reliable_discards(struct drbd_device *device) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); struct disk_conf *dc; bool can_do; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(device->ldev->backing_bdev)) return false; rcu_read_lock(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58595f5ee2c..fabcf647306a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -314,15 +314,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, mode |= FALLOC_FL_KEEP_SIZE; - if (!blk_queue_discard(lo->lo_queue)) { - ret = -EOPNOTSUPP; - goto out; - } + if (!bdev_max_discard_sectors(lo->lo_device)) + return -EOPNOTSUPP; ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) - ret = -EIO; - out: + return -EIO; return ret; } @@ -762,7 +759,7 @@ static void loop_config_discard(struct loop_device *lo) struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; - granularity = backingq->limits.discard_granularity ?: + granularity = bdev_discard_granularity(I_BDEV(inode)) ?: queue_physical_block_size(backingq); /* @@ -787,12 +784,10 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = granularity; blk_queue_max_discard_sectors(q, max_discard_sectors); blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } else { q->limits.discard_granularity = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); } q->limits.discard_alignment = 0; } @@ -903,7 +898,7 @@ static void loop_update_rotational(struct loop_device *lo) /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */ if (file_bdev) - nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev)); + nonrot = bdev_nonrot(file_bdev); if (nonrot) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); @@ -1834,12 +1829,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->blkcg_css = NULL; cmd->memcg_css = NULL; #ifdef CONFIG_BLK_CGROUP - if (rq->bio && rq->bio->bi_blkg) { - cmd->blkcg_css = &bio_blkcg(rq->bio)->css; + if (rq->bio) { + cmd->blkcg_css = bio_blkcg_css(rq->bio); #ifdef CONFIG_MEMCG - cmd->memcg_css = - cgroup_get_e_css(cmd->blkcg_css->cgroup, - &memory_cgrp_subsys); + if (cmd->blkcg_css) { + cmd->memcg_css = + cgroup_get_e_css(cmd->blkcg_css->cgroup, + &memory_cgrp_subsys); + } #endif } #endif diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5a1f98494ddd..4729aef8c646 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1231,8 +1231,6 @@ static void nbd_parse_flags(struct nbd_device *nbd) set_disk_ro(nbd->disk, true); else set_disk_ro(nbd->disk, false); - if (config->flags & NBD_FLAG_SEND_TRIM) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (config->flags & NBD_FLAG_SEND_FLUSH) { if (config->flags & NBD_FLAG_SEND_FUA) blk_queue_write_cache(nbd->disk->queue, true, true); @@ -1319,8 +1317,7 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->tag_set.timeout = 0; nbd->disk->queue->limits.discard_granularity = 0; nbd->disk->queue->limits.discard_alignment = 0; - blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_max_discard_sectors(nbd->disk->queue, 0); mutex_unlock(&nbd->config_lock); nbd_put(nbd); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index c441a4972064..5cb4c92cdffe 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1767,7 +1767,6 @@ static void null_config_discard(struct nullb *nullb) nullb->q->limits.discard_granularity = nullb->dev->blocksize; nullb->q->limits.discard_alignment = nullb->dev->blocksize; blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); } static const struct block_device_operations null_bio_ops = { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a3a43ab8582d..789093375344 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -522,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames) goto no_pkt; pkt->frames = frames; - pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames); + pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL); if (!pkt->w_bio) goto no_bio; @@ -536,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames) bio_list_init(&pkt->orig_bios); for (i = 0; i < frames; i++) { - struct bio *bio = bio_kmalloc(GFP_KERNEL, 1); - if (!bio) + pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL); + if (!pkt->r_bios[i]) goto no_rd_bio; - - pkt->r_bios[i] = bio; } return pkt; no_rd_bio: - for (i = 0; i < frames; i++) { - struct bio *bio = pkt->r_bios[i]; - if (bio) - bio_put(bio); - } - + for (i = 0; i < frames; i++) + kfree(pkt->r_bios[i]); no_page: for (i = 0; i < frames / FRAMES_PER_PAGE; i++) if (pkt->pages[i]) __free_page(pkt->pages[i]); - bio_put(pkt->w_bio); + kfree(pkt->w_bio); no_bio: kfree(pkt); no_pkt: @@ -570,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt) { int i; - for (i = 0; i < pkt->frames; i++) { - struct bio *bio = pkt->r_bios[i]; - if (bio) - bio_put(bio); - } + for (i = 0; i < pkt->frames; i++) + kfree(pkt->r_bios[i]); for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++) __free_page(pkt->pages[i]); - bio_put(pkt->w_bio); + kfree(pkt->w_bio); kfree(pkt); } @@ -951,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio) if (bio->bi_status) atomic_inc(&pkt->io_errors); + bio_uninit(bio); if (atomic_dec_and_test(&pkt->io_wait)) { atomic_inc(&pkt->run_sm); wake_up(&pd->wqueue); @@ -968,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio) pd->stats.pkt_ended++; + bio_uninit(bio); pkt_bio_finished(pd); atomic_dec(&pkt->io_wait); atomic_inc(&pkt->run_sm); @@ -1022,7 +1015,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) continue; bio = pkt->r_bios[f]; - bio_reset(bio, pd->bdev, REQ_OP_READ); + bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); bio->bi_end_io = pkt_end_io_read; bio->bi_private = pkt; @@ -1235,7 +1228,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) { int f; - bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE); + bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames, + REQ_OP_WRITE); pkt->w_bio->bi_iter.bi_sector = pkt->sector; pkt->w_bio->bi_end_io = pkt_end_io_packet_write; pkt->w_bio->bi_private = pkt; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b844432bad20..2b21f717cce1 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_opt(q, rbd_dev->opts->alloc_size); if (rbd_dev->opts->trim) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.discard_granularity = rbd_dev->opts->alloc_size; blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index b66e8840b94b..d178be175ad9 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1364,11 +1364,9 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors); dev->queue->limits.discard_granularity = dev->discard_granularity; dev->queue->limits.discard_alignment = dev->discard_alignment; - if (dev->max_discard_sectors) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue); if (dev->secure_discard) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue); - + blk_queue_max_secure_erase_sectors(dev->queue, + dev->max_discard_sectors); blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); blk_queue_max_segments(dev->queue, dev->max_segments); diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 2c3df02b5e8e..d080a0de5922 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -44,16 +44,12 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev) static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev) { - return blk_queue_secure_erase(bdev_get_queue(dev->bdev)); + return bdev_max_secure_erase_sectors(dev->bdev); } static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev) { - if (!blk_queue_discard(bdev_get_queue(dev->bdev))) - return 0; - - return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev), - REQ_OP_DISCARD); + return bdev_max_discard_sectors(dev->bdev); } static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index f04df6294650..beaef43a67b9 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -533,7 +533,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev; - struct request_queue *q = bdev_get_queue(rnbd_dev->bdev); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = @@ -558,9 +557,9 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); rsp->cache_policy = 0; - if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + if (bdev_write_cache(rnbd_dev->bdev)) rsp->cache_policy |= RNBD_WRITEBACK; - if (blk_queue_fua(q)) + if (bdev_fua(rnbd_dev->bdev)) rsp->cache_policy |= RNBD_FUA; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a8bcf3f664af..6ccf15253dee 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -888,8 +888,6 @@ static int virtblk_probe(struct virtio_device *vdev) v = sg_elems; blk_queue_max_discard_segments(q, min(v, MAX_DISCARD_SEGMENTS)); - - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index de42458195bc..a97f2bf5b01b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; struct block_device *bdev = blkif->vbd.bdev; - unsigned long secure; struct phys_req preq; xen_blkif_get(blkif); @@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, } ring->st_ds_req++; - secure = (blkif->vbd.discard_secure && - (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? - BLKDEV_DISCARD_SECURE : 0; + if (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) + err = blkdev_issue_secure_erase(bdev, + req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); + else + err = blkdev_issue_discard(bdev, req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); - err = blkdev_issue_discard(bdev, req->u.discard.sector_number, - req->u.discard.nr_sectors, - GFP_KERNEL, secure); fail_response: if (err == -EOPNOTSUPP) { pr_debug("discard op failed, not supported\n"); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f09040435e2e..b21bffc9c50b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, { struct xen_vbd *vbd; struct block_device *bdev; - struct request_queue *q; vbd = &blkif->vbd; vbd->handle = handle; @@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - q = bdev_get_queue(bdev); - if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + if (bdev_write_cache(bdev)) vbd->flush_support = true; - - if (q && blk_queue_secure_erase(q)) + if (bdev_max_secure_erase_sectors(bdev)) vbd->discard_secure = true; vbd->feature_gnt_persistent = feature_persistent; @@ -583,7 +580,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1)) return; - if (blk_queue_discard(q)) { + if (bdev_max_discard_sectors(bdev)) { err = xenbus_printf(xbt, dev->nodename, "discard-granularity", "%u", q->limits.discard_granularity); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 003056d4f7f5..0f3f5238f7bc 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -944,13 +944,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info) blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity ?: info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); + blk_queue_max_secure_erase_sectors(rq, + get_capacity(gd)); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -1606,8 +1606,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); + blk_queue_max_discard_sectors(rq, 0); + blk_queue_max_secure_erase_sectors(rq, 0); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e9474b02012d..1b277308d3ec 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1675,9 +1675,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op); + start_time = bdev_start_io_acct(bdev->bd_disk->part0, + SECTORS_PER_PAGE, op, jiffies); ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); - disk_end_io_acct(bdev->bd_disk, op, start_time); + bdev_end_io_acct(bdev->bd_disk->part0, op, start_time); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -1952,7 +1953,6 @@ static int zram_add(void) blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); /* * zram_bio_discard() will clear all logical blocks if logical block diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 097577ae3c47..ce13c272c387 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg) mutex_unlock(&ca->set->bucket_lock); blkdev_issue_discard(ca->bdev, bucket_to_sector(ca->set, bucket), - ca->sb.bucket_size, GFP_KERNEL, 0); + ca->sb.bucket_size, GFP_KERNEL); mutex_lock(&ca->set->bucket_lock); } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 6230dfdd9286..7510d1c983a5 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -107,15 +107,16 @@ void bch_btree_verify(struct btree *b) void bch_data_verify(struct cached_dev *dc, struct bio *bio) { + unsigned int nr_segs = bio_segments(bio); struct bio *check; struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; - check = bio_kmalloc(GFP_NOIO, bio_segments(bio)); + check = bio_kmalloc(nr_segs, GFP_NOIO); if (!check) return; - bio_set_dev(check, bio->bi_bdev); - check->bi_opf = REQ_OP_READ; + bio_init(check, bio->bi_bdev, check->bi_inline_vecs, nr_segs, + REQ_OP_READ); check->bi_iter.bi_sector = bio->bi_iter.bi_sector; check->bi_iter.bi_size = bio->bi_iter.bi_size; @@ -146,7 +147,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) bio_free_pages(check); out_put: - bio_put(check); + bio_uninit(check); + kfree(check); } #endif diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 320fcdfef48e..9c5dde73da88 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) bio_get(s->iop.bio); if (bio_op(bio) == REQ_OP_DISCARD && - !blk_queue_discard(bdev_get_queue(dc->bdev))) + !bdev_max_discard_sectors(dc->bdev)) goto insert_data; /* I/O request sent to backing device */ @@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, bio->bi_private = ddip; if ((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(dc->bdev))) + !bdev_max_discard_sectors(dc->bdev)) bio->bi_end_io(bio); else submit_bio_noacct(bio); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index bf3de149d3c9..2f49e31142f6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); blk_queue_write_cache(q, true, true); @@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; - if (blk_queue_discard(bdev_get_queue(bdev))) + if (bdev_max_discard_sectors((bdev))) ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index d1029d71ff3b..c6f677059214 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -1151,7 +1151,7 @@ STORE(__bch_cache) if (attr == &sysfs_discard) { bool v = strtoul_or_return(buf); - if (blk_queue_discard(bdev_get_queue(ca->bdev))) + if (bdev_max_discard_sectors(ca->bdev)) ca->discard = v; if (v != CACHE_DISCARD(&ca->sb)) { diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index e9cbc70d5a0e..5ffa1dcf84cf 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -611,7 +611,8 @@ static void bio_complete(struct bio *bio) { struct dm_buffer *b = bio->bi_private; blk_status_t status = bio->bi_status; - bio_put(bio); + bio_uninit(bio); + kfree(bio); b->end_io(b, status); } @@ -626,16 +627,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector, if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT)) vec_size += 2; - bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size); + bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); if (!bio) { dmio: use_dmio(b, rw, sector, n_sectors, offset); return; } - + bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw); bio->bi_iter.bi_sector = sector; - bio_set_dev(bio, b->c->bdev); - bio_set_op_attrs(bio, rw, 0); bio->bi_end_io = bio_complete; bio->bi_private = b; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 780a61bc6cc0..28c5de8eca4a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } -static bool origin_dev_supports_discard(struct block_device *origin_bdev) -{ - struct request_queue *q = bdev_get_queue(origin_bdev); - - return blk_queue_discard(q); -} - /* * If discard_passdown was enabled verify that the origin device * supports discards. Disable discard_passdown if not. @@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache) if (!cache->features.discard_passdown) return; - if (!origin_dev_supports_discard(origin_bdev)) + if (!bdev_max_discard_sectors(origin_bdev)) reason = "discard unsupported"; else if (origin_limits->max_discard_sectors < cache->sectors_per_block) diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 128316a73d01..811b0a5379d0 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti) do_waker(&clone->waker.work); } -static bool bdev_supports_discards(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - return (q && blk_queue_discard(q)); -} - /* * If discard_passdown was enabled verify that the destination device supports * discards. Disable discard_passdown if not. @@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) return; - if (!bdev_supports_discards(dest_dev)) + if (!bdev_max_discard_sectors(dest_dev)) reason = "discard unsupported"; else if (dest_limits->max_discard_sectors < clone->region_size) reason = "max discard sectors smaller than a region"; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 5762366333a2..e4b95eaeec8c 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -311,7 +311,7 @@ static void do_region(int op, int op_flags, unsigned region, * Reject unsupported discard and write same requests. */ if (op == REQ_OP_DISCARD) - special_cmd_max_sectors = q->limits.max_discard_sectors; + special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev); else if (op == REQ_OP_WRITE_ZEROES) special_cmd_max_sectors = q->limits.max_write_zeroes_sectors; if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) && diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c9d036d6bb2e..e194226c89e5 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv, static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct log_writes_c *lc = ti->private; - struct request_queue *q = bdev_get_queue(lc->dev->bdev); - if (!q || !blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(lc->dev->bdev)) { lc->device_supports_discard = false; limits->discard_granularity = lc->sectorsize; limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 2b26435a6946..9526ccbedafb 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs) raid456 = rs_is_raid456(rs); for (i = 0; i < rs->raid_disks; i++) { - struct request_queue *q; - - if (!rs->dev[i].rdev.bdev) - continue; - - q = bdev_get_queue(rs->dev[i].rdev.bdev); - if (!q || !blk_queue_discard(q)) + if (!rs->dev[i].rdev.bdev || + !bdev_max_discard_sectors(rs->dev[i].rdev.bdev)) return; if (raid456) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 03541cfc2317..e7d42f6335a2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1820,9 +1820,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_nonrot(q); + return !bdev_nonrot(dev->bdev); } static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, @@ -1890,9 +1888,7 @@ static bool dm_table_supports_nowait(struct dm_table *t) static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_discard(q); + return !bdev_max_discard_sectors(dev->bdev); } static bool dm_table_supports_discards(struct dm_table *t) @@ -1924,9 +1920,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_secure_erase(q); + return !bdev_max_secure_erase_sectors(dev->bdev); } static bool dm_table_supports_secure_erase(struct dm_table *t) @@ -1952,9 +1946,7 @@ static int device_requires_stable_pages(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return blk_queue_stable_writes(q); + return bdev_stable_writes(dev->bdev); } int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, @@ -1974,18 +1966,15 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); if (!dm_table_supports_discards(t)) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); - /* Must also clear discard limits... */ q->limits.max_discard_sectors = 0; q->limits.max_hw_discard_sectors = 0; q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; q->limits.discard_misaligned = 0; - } else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + } - if (dm_table_supports_secure_erase(t)) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); + if (!dm_table_supports_secure_erase(t)) + q->limits.max_secure_erase_sectors = 0; if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4d25d0e27031..84c083f76673 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da sector_t s = block_to_sectors(tc->pool, data_b); sector_t len = block_to_sectors(tc->pool, data_e - data_b); - return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, - GFP_NOWAIT, 0, &op->bio); + return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT, + &op->bio); } static void end_discard(struct discard_op *op, int r) @@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ -static bool data_dev_supports_discard(struct pool_c *pt) -{ - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - - return blk_queue_discard(q); -} - static bool is_factor(sector_t block_size, uint32_t n) { return !sector_div(block_size, n); @@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) if (!pt->adjusted_pf.discard_passdown) return; - if (!data_dev_supports_discard(pt)) + if (!bdev_max_discard_sectors(pt->data_dev->bdev)) reason = "discard unsupported"; else if (data_limits->max_discard_sectors < pool->sectors_per_block) @@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) /* * Must explicitly disallow stacking discard limits otherwise the * block layer will stack them if pool's data device has support. - * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the - * user to see that, so make sure to set all discard limits to 0. */ limits->discard_granularity = 0; return; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 82957bd460e8..39081338ca61 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md) /* device doesn't really support DISCARD, disable it */ limits->max_discard_sectors = 0; - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); } void disable_write_zeroes(struct mapped_device *md) @@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio) if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_DISCARD && - !q->limits.max_discard_sectors) + !bdev_max_discard_sectors(bio->bi_bdev)) disable_discard(md); else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && !q->limits.max_write_zeroes_sectors) diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 0f55b079371b..138a3b25c5c8 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) struct linear_conf *conf; struct md_rdev *rdev; int i, cnt; - bool discard_supported = false; conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); if (!conf) @@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) conf->array_sectors += rdev->sectors; cnt++; - - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } if (cnt != raid_disks) { pr_warn("md/linear:%s: not enough drives present. Aborting!\n", @@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) goto out; } - if (!discard_supported) - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); - else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); - /* * Here we calculate the device offsets. */ @@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) start_sector + data_offset; if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) { + !bdev_max_discard_sectors(bio->bi_bdev))) { /* Just ignore it */ bio_endio(bio); } else { diff --git a/drivers/md/md.c b/drivers/md/md.c index 309b3af906ad..2587f872c088 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5991,8 +5991,7 @@ int md_run(struct mddev *mddev) bool nonrot = true; rdev_for_each(rdev, mddev) { - if (rdev->raid_disk >= 0 && - !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) { nonrot = false; break; } @@ -8585,7 +8584,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, { struct bio *discard_bio = NULL; - if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0, + if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, &discard_bio) || !discard_bio) return; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b21e101183f4..7231f5e1eaa7 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,7 +399,6 @@ static int raid0_run(struct mddev *mddev) conf = mddev->private; if (mddev->queue) { struct md_rdev *rdev; - bool discard_supported = false; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); @@ -412,13 +411,7 @@ static int raid0_run(struct mddev *mddev) rdev_for_each(rdev, mddev) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } - if (!discard_supported) - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); - else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); } /* calculate array device size */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 99d5464a51f8..5aed2c8b746e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -165,9 +165,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) * Allocate bios : 1 for reading, n-1 for writing */ for (j = pi->raid_disks ; j-- ; ) { - bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); + bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; + bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0); r1_bio->bios[j] = bio; } /* @@ -206,8 +207,10 @@ out_free_pages: resync_free_pages(&rps[j]); out_free_bio: - while (++j < pi->raid_disks) - bio_put(r1_bio->bios[j]); + while (++j < pi->raid_disks) { + bio_uninit(r1_bio->bios[j]); + kfree(r1_bio->bios[j]); + } kfree(rps); out_free_r1bio: @@ -225,7 +228,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data) for (i = pi->raid_disks; i--; ) { rp = get_resync_pages(r1bio->bios[i]); resync_free_pages(rp); - bio_put(r1bio->bios[i]); + bio_uninit(r1bio->bios[i]); + kfree(r1bio->bios[i]); } /* resync pages array stored in the 1st bio's .bi_private */ @@ -704,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect /* At least two disks to choose from so failfast is OK */ set_bit(R1BIO_FailFast, &r1_bio->state); - nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); + nonrot = bdev_nonrot(rdev->bdev); has_nonrot_disk |= nonrot; pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); @@ -802,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -1826,8 +1830,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } - if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; } @@ -3106,7 +3108,6 @@ static int raid1_run(struct mddev *mddev) int i; struct md_rdev *rdev; int ret; - bool discard_supported = false; if (mddev->level != 1) { pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n", @@ -3141,8 +3142,6 @@ static int raid1_run(struct mddev *mddev) continue; disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } mddev->degraded = 0; @@ -3179,15 +3178,6 @@ static int raid1_run(struct mddev *mddev) md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); - if (mddev->queue) { - if (discard_supported) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); - } - ret = md_integrity_register(mddev); if (ret) { md_unregister_thread(&mddev->thread); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index dfe7d62d3fbd..834eb3ba95a6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -145,15 +145,17 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) * Allocate bios. */ for (j = nalloc ; j-- ; ) { - bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); + bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; + bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0); r10_bio->devs[j].bio = bio; if (!conf->have_replacement) continue; - bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); + bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; + bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0); r10_bio->devs[j].repl_bio = bio; } /* @@ -197,9 +199,11 @@ out_free_pages: out_free_bio: for ( ; j < nalloc; j++) { if (r10_bio->devs[j].bio) - bio_put(r10_bio->devs[j].bio); + bio_uninit(r10_bio->devs[j].bio); + kfree(r10_bio->devs[j].bio); if (r10_bio->devs[j].repl_bio) - bio_put(r10_bio->devs[j].repl_bio); + bio_uninit(r10_bio->devs[j].repl_bio); + kfree(r10_bio->devs[j].repl_bio); } kfree(rps); out_free_r10bio: @@ -220,12 +224,15 @@ static void r10buf_pool_free(void *__r10_bio, void *data) if (bio) { rp = get_resync_pages(bio); resync_free_pages(rp); - bio_put(bio); + bio_uninit(bio); + kfree(bio); } bio = r10bio->devs[j].repl_bio; - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } } /* resync pages array stored in the 1st bio's .bi_private */ @@ -796,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, if (!do_balance) break; - nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); + nonrot = bdev_nonrot(rdev->bdev); has_nonrot_disk |= nonrot; pending = atomic_read(&rdev->nr_pending); if (min_pending > pending && nonrot) { @@ -888,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -1083,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -2144,8 +2151,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) rcu_assign_pointer(p->rdev, rdev); break; } - if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; @@ -4069,7 +4074,6 @@ static int raid10_run(struct mddev *mddev) sector_t size; sector_t min_offset_diff = 0; int first = 1; - bool discard_supported = false; if (mddev_init_writes_pending(mddev) < 0) return -ENOMEM; @@ -4140,20 +4144,9 @@ static int raid10_run(struct mddev *mddev) rdev->data_offset << 9); disk->head_position = 0; - - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; first = 0; } - if (mddev->queue) { - if (discard_supported) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); - } /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { pr_err("md/raid10:%s: not enough operational mirrors.\n", diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index a7d50ff9020a..094a4042589e 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, r5l_write_super(log, end); - if (!blk_queue_discard(bdev_get_queue(bdev))) + if (!bdev_max_discard_sectors(bdev)) return; mddev = log->rdev->mddev; @@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, if (log->last_checkpoint < end) { blkdev_issue_discard(bdev, log->last_checkpoint + log->rdev->data_offset, - end - log->last_checkpoint, GFP_NOIO, 0); + end - log->last_checkpoint, GFP_NOIO); } else { blkdev_issue_discard(bdev, log->last_checkpoint + log->rdev->data_offset, log->device_size - log->last_checkpoint, - GFP_NOIO, 0); + GFP_NOIO); blkdev_issue_discard(bdev, log->rdev->data_offset, end, - GFP_NOIO, 0); + GFP_NOIO); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 351d341a1ffa..59f91e392a2a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7242,7 +7242,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) continue; - if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + if (bdev_nonrot(rdev->bdev)) { conf->batch_bio_dispatch = false; break; } @@ -7776,14 +7776,10 @@ static int raid5_run(struct mddev *mddev) * A better idea might be to turn DISCARD into WRITE_ZEROES * requests, as that is required to be safe. */ - if (devices_handle_discard_safely && - mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && - mddev->queue->limits.discard_granularity >= stripe) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); + if (!devices_handle_discard_safely || + mddev->queue->limits.max_discard_sectors < (stripe >> 9) || + mddev->queue->limits.discard_granularity < stripe) + blk_queue_max_discard_sectors(mddev->queue, 0); blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index c69b2d9df6f1..a3d446005571 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -183,14 +183,13 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); blk_queue_max_discard_sectors(q, max_discard); q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ if (card->pref_erase > max_discard) q->limits.discard_granularity = SECTOR_SIZE; if (mmc_can_secure_erase_trim(card)) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); + blk_queue_max_secure_erase_sectors(q, max_discard); } static unsigned short mmc_get_max_segments(struct mmc_host *host) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 64d2b093f114..f73172111465 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); if (tr->discard) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq); blk_queue_max_discard_sectors(new->rq, UINT_MAX); new->rq->limits.discard_granularity = tr->blksize; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1a984045e49c..c951d7b2cf48 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1621,7 +1621,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) u32 size = queue_logical_block_size(queue); if (ctrl->max_discard_sectors == 0) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + blk_queue_max_discard_sectors(queue, 0); return; } @@ -1632,7 +1632,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) queue->limits.discard_granularity = size; /* If discard is already enabled, don't reset queue limits */ - if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + if (queue->limits.max_discard_sectors) return; blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 080f85f4105f..7ae72c7a211b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3831,6 +3831,9 @@ process_local_list: return count; } +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +#ifdef CONFIG_BLK_CGROUP_FC_APPID /* Parse the cgroup id from a buf and return the length of cgrpid */ static int fc_parse_cgrpid(const char *buf, u64 *id) { @@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id) } /* - * fc_update_appid: Parse and update the appid in the blkcg associated with - * cgroupid. - * @buf: buf contains both cgrpid and appid info - * @count: size of the buffer + * Parse and update the appid in the blkcg associated with the cgroupid. */ -static int fc_update_appid(const char *buf, size_t count) +static ssize_t fc_appid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { u64 cgrp_id; int appid_len = 0; @@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count) return ret; return count; } - -static ssize_t fc_appid_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int ret = 0; - - ret = fc_update_appid(buf, count); - if (ret < 0) - return -EINVAL; - return count; -} -static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); +#endif /* CONFIG_BLK_CGROUP_FC_APPID */ static struct attribute *nvme_fc_attrs[] = { &dev_attr_nvme_discovery.attr, +#ifdef CONFIG_BLK_CGROUP_FC_APPID &dev_attr_appid_store.attr, +#endif NULL }; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index d886c2c59554..27a72504d31c 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, ret = __blkdev_issue_discard(ns->bdev, nvmet_lba_to_sect(ns, range->slba), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); + GFP_KERNEL, bio); if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); return errno_to_nvme_status(req, ret); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index e34718b09550..82b61acf7a72 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z, bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) { - struct request_queue *q = ns->bdev->bd_disk->queue; - u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); + u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev)); struct gendisk *bd_disk = ns->bdev->bd_disk; int ret; diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index e084f4dedddd..8bd5665db919 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -791,7 +791,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block) blk_queue_max_discard_sectors(q, max_discard_sectors); blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static int dasd_fba_pe_handler(struct dasd_device *device, diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index ba9dbb51b75f..f6b83853f7ee 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5528,7 +5528,9 @@ static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd) { struct bio *bio = scsi_cmd_to_rq(cmd)->bio; - return bio ? blkcg_get_fc_appid(bio) : NULL; + if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !bio) + return NULL; + return blkcg_get_fc_appid(bio); } /** diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index dc6e55761fd1..9694e2cfaf9a 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -797,7 +797,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_FULL: case SD_LBP_DISABLE: blk_queue_max_discard_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); return; case SD_LBP_UNMAP: @@ -830,7 +829,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) } blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 44bb380e7390..25f33eb25337 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -829,28 +829,26 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) } /* - * Check if the underlying struct block_device request_queue supports - * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM - * in ATA and we need to set TPE=1 + * Check if the underlying struct block_device supports discard and if yes + * configure the UNMAP parameters. */ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, - struct request_queue *q) + struct block_device *bdev) { - int block_size = queue_logical_block_size(q); + int block_size = bdev_logical_block_size(bdev); - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return false; attrib->max_unmap_lba_count = - q->limits.max_discard_sectors >> (ilog2(block_size) - 9); + bdev_max_discard_sectors(bdev) >> (ilog2(block_size) - 9); /* * Currently hardcoded to 1 in Linux/SCSI code.. */ attrib->max_unmap_block_desc_count = 1; - attrib->unmap_granularity = q->limits.discard_granularity / block_size; - attrib->unmap_granularity_alignment = q->limits.discard_alignment / - block_size; - attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors); + attrib->unmap_granularity = bdev_discard_granularity(bdev) / block_size; + attrib->unmap_granularity_alignment = + bdev_discard_alignment(bdev) / block_size; return true; } EXPORT_SYMBOL(target_configure_unmap_from_queue); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 8190b840065f..e68f1cc8ef98 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -134,10 +134,10 @@ static int fd_configure_device(struct se_device *dev) */ inode = file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { - struct request_queue *q = bdev_get_queue(I_BDEV(inode)); + struct block_device *bdev = I_BDEV(inode); unsigned long long dev_size; - fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode)); + fd_dev->fd_block_size = bdev_logical_block_size(bdev); /* * Determine the number of bytes from i_size_read() minus * one (1) logical sector from underlying struct block_device @@ -150,7 +150,7 @@ static int fd_configure_device(struct se_device *dev) dev_size, div_u64(dev_size, fd_dev->fd_block_size), fd_dev->fd_block_size); - if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, bdev)) pr_debug("IFILE: BLOCK Discard support available," " disabled by default\n"); /* @@ -159,7 +159,7 @@ static int fd_configure_device(struct se_device *dev) */ dev->dev_attrib.max_write_same_len = 0xFFFF; - if (blk_queue_nonrot(q)) + if (bdev_nonrot(bdev)) dev->dev_attrib.is_nonrot = 1; } else { if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) { @@ -558,7 +558,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) ret = blkdev_issue_discard(bdev, target_to_linux_sector(dev, lba), target_to_linux_sector(dev, nolb), - GFP_KERNEL, 0); + GFP_KERNEL); if (ret < 0) { pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n", ret); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 87ede165ddba..378c80313a0f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -119,7 +119,7 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; - if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, bd)) pr_debug("IBLOCK: BLOCK Discard support available," " disabled by default\n"); @@ -133,7 +133,7 @@ static int iblock_configure_device(struct se_device *dev) else dev->dev_attrib.max_write_same_len = 0xFFFF; - if (blk_queue_nonrot(q)) + if (bdev_nonrot(bd)) dev->dev_attrib.is_nonrot = 1; bi = bdev_get_integrity(bd); @@ -434,7 +434,7 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) ret = blkdev_issue_discard(bdev, target_to_linux_sector(dev, lba), target_to_linux_sector(dev, nolb), - GFP_KERNEL, 0); + GFP_KERNEL); if (ret < 0) { pr_err("blkdev_issue_discard() failed: %d\n", ret); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -727,17 +727,16 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (data_direction == DMA_TO_DEVICE) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); - struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd); /* * Force writethrough using REQ_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ opf = REQ_OP_WRITE; miter_dir = SG_MITER_TO_SG; - if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { + if (bdev_fua(ib_dev->ibd_bd)) { if (cmd->se_cmd_flags & SCF_FUA) opf |= REQ_FUA; - else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + else if (!bdev_write_cache(ib_dev->ibd_bd)) opf |= REQ_FUA; } } else { @@ -886,11 +885,7 @@ iblock_parse_cdb(struct se_cmd *cmd) static bool iblock_get_write_cache(struct se_device *dev) { - struct iblock_dev *ib_dev = IBLOCK_DEV(dev); - struct block_device *bd = ib_dev->ibd_bd; - struct request_queue *q = bdev_get_queue(bd); - - return test_bit(QUEUE_FLAG_WC, &q->queue_flags); + return bdev_write_cache(IBLOCK_DEV(dev)->ibd_bd); } static const struct target_backend_ops iblock_ops = { diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 60dafe4c581b..bb3fb18b2316 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -818,24 +818,8 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b) static void pscsi_bi_endio(struct bio *bio) { - bio_put(bio); -} - -static inline struct bio *pscsi_get_bio(int nr_vecs) -{ - struct bio *bio; - /* - * Use bio_malloc() following the comment in for bio -> struct request - * in block/blk-core.c:blk_make_request() - */ - bio = bio_kmalloc(GFP_KERNEL, nr_vecs); - if (!bio) { - pr_err("PSCSI: bio_kmalloc() failed\n"); - return NULL; - } - bio->bi_end_io = pscsi_bi_endio; - - return bio; + bio_uninit(bio); + kfree(bio); } static sense_reason_t @@ -878,15 +862,12 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (!bio) { new_bio: nr_vecs = bio_max_segs(nr_pages); - /* - * Calls bio_kmalloc() and sets bio->bi_end_io() - */ - bio = pscsi_get_bio(nr_vecs); + bio = bio_kmalloc(nr_vecs, GFP_KERNEL); if (!bio) goto fail; - - if (rw) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, + rw ? REQ_OP_WRITE : REQ_OP_READ); + bio->bi_end_io = pscsi_bi_endio; pr_debug("PSCSI: Allocated bio: %p," " dir: %s nr_vecs: %d\n", bio, @@ -912,11 +893,6 @@ new_bio: goto fail; } - /* - * Clear the pointer so that another bio will - * be allocated with pscsi_get_bio() above. - */ - bio = NULL; goto new_bio; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31c3f592e587..84795d831282 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4238,6 +4238,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) */ static void btrfs_end_empty_barrier(struct bio *bio) { + bio_uninit(bio); complete(bio->bi_private); } @@ -4247,7 +4248,7 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static void write_dev_flush(struct btrfs_device *device) { - struct bio *bio = device->flush_bio; + struct bio *bio = &device->flush_bio; #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* @@ -4260,12 +4261,12 @@ static void write_dev_flush(struct btrfs_device *device) * of simplicity, since this is a debug tool and not meant for use in * non-debug builds. */ - struct request_queue *q = bdev_get_queue(device->bdev); - if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + if (!bdev_write_cache(device->bdev)) return; #endif - bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH); + bio_init(bio, device->bdev, NULL, 0, + REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH); bio->bi_end_io = btrfs_end_empty_barrier; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; @@ -4279,7 +4280,7 @@ static void write_dev_flush(struct btrfs_device *device) */ static blk_status_t wait_dev_flush(struct btrfs_device *device) { - struct bio *bio = device->flush_bio; + struct bio *bio = &device->flush_bio; if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return BLK_STS_OK; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6aa92f84f465..6260784e74b5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1239,7 +1239,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, if (size) { ret = blkdev_issue_discard(bdev, start >> 9, size >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) *discarded_bytes += size; else if (ret != -EOPNOTSUPP) @@ -1256,7 +1256,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, if (bytes_left) { ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) *discarded_bytes += bytes_left; } @@ -1291,7 +1291,7 @@ static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes) ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len, &discarded); discarded += src_disc; - } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) { + } else if (bdev_max_discard_sectors(stripe->dev->bdev)) { ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded); } else { ret = 0; @@ -5987,7 +5987,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) *trimmed = 0; /* Discard not supported = nothing to do. */ - if (!blk_queue_discard(bdev_get_queue(device->bdev))) + if (!bdev_max_discard_sectors(device->bdev)) return 0; /* Not writable = nothing to do. */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index be6c24577dbe..4b28aaea2702 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -468,7 +468,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_device *device; - struct request_queue *q; struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; @@ -498,14 +497,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, rcu_read_lock(); list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, dev_list) { - if (!device->bdev) + if (!device->bdev || !bdev_max_discard_sectors(device->bdev)) continue; - q = bdev_get_queue(device->bdev); - if (blk_queue_discard(q)) { - num_devices++; - minlen = min_t(u64, q->limits.discard_granularity, - minlen); - } + num_devices++; + minlen = min_t(u64, bdev_discard_granularity(device->bdev), + minlen); } rcu_read_unlock(); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a8cc736731fd..b6b00338037c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -405,7 +405,6 @@ void btrfs_free_device(struct btrfs_device *device) WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); extent_io_tree_release(&device->alloc_state); - bio_put(device->flush_bio); btrfs_destroy_dev_zone_info(device); kfree(device); } @@ -643,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); } - if (!blk_queue_nonrot(bdev_get_queue(bdev))) + if (!bdev_nonrot(bdev)) fs_devices->rotating = true; device->bdev = bdev; @@ -2706,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path atomic64_add(device->total_bytes, &fs_info->free_chunk_space); - if (!blk_queue_nonrot(bdev_get_queue(bdev))) + if (!bdev_nonrot(bdev)) fs_devices->rotating = true; orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); @@ -6949,16 +6948,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, if (!dev) return ERR_PTR(-ENOMEM); - /* - * Preallocate a bio that's always going to be used for flushing device - * barriers and matches the device lifespan - */ - dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0); - if (!dev->flush_bio) { - kfree(dev); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); INIT_LIST_HEAD(&dev->post_commit_list); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f3e28f11cfb6..b11c563d2025 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -121,8 +121,8 @@ struct btrfs_device { /* bytes used on the current transaction */ u64 commit_bytes_used; - /* for sending down flush barriers */ - struct bio *flush_bio; + /* Bio used for flushing device barriers */ + struct bio flush_bio; struct completion flush_wait; /* per-device scrub information */ diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index d31b0eda210f..29b54fd9c128 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -350,7 +350,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_zoned_device_info *zone_info = NULL; struct block_device *bdev = device->bdev; - struct request_queue *queue = bdev_get_queue(bdev); unsigned int max_active_zones; unsigned int nactive; sector_t nr_sectors; @@ -410,7 +409,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; - max_active_zones = queue_max_active_zones(queue); + max_active_zones = bdev_max_active_zones(bdev); if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { btrfs_err_in_rcu(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", diff --git a/fs/direct-io.c b/fs/direct-io.c index aef06e607b40..840752006f60 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1115,11 +1115,10 @@ static inline int drop_refcount(struct dio *dio) * individual fields and will generate much worse code. This is important * for the whole file. */ -static inline ssize_t -do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) { unsigned i_blkbits = READ_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; @@ -1334,29 +1333,6 @@ fail_dio: kmem_cache_free(dio_cache, dio); return retval; } - -ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, - get_block_t get_block, - dio_iodone_t end_io, dio_submit_t submit_io, - int flags) -{ - /* - * The block device state is needed in the end to finally - * submit everything. Since it's likely to be cache cold - * prefetch it here as first thing to hide some of the - * latency. - * - * Attempt to prefetch the pieces we likely need later. - */ - prefetch(&bdev->bd_disk->part_tbl); - prefetch(bdev->bd_disk->queue); - prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES); - - return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block, - end_io, submit_io, flags); -} - EXPORT_SYMBOL(__blockdev_direct_IO); static __init int dio_init(void) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 2f5130059236..20d4e47f57ab 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -351,21 +351,20 @@ out: static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg) { - struct request_queue *q = bdev_get_queue(inode->i_sb->s_bdev); struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(inode->i_sb->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max_t(unsigned int, range.minlen, - q->limits.discard_granularity); + bdev_discard_granularity(inode->i_sb->s_bdev)); ret = exfat_trim_fs(inode, &range); if (ret < 0) diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 8ca21e7917d1..be0788ecaf20 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -627,13 +627,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) if (opts->allow_utime == (unsigned short)-1) opts->allow_utime = ~opts->fs_dmask & 0022; - if (opts->discard) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - - if (!blk_queue_discard(q)) { - exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); - opts->discard = 0; - } + if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; } sb->s_flags |= SB_NODIRATIME; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ba44fa1be70a..4d1d2326eee9 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1044,7 +1044,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) __u32 flags = 0; unsigned int flush_flags = 0; struct super_block *sb = file_inode(filp)->i_sb; - struct request_queue *q; if (copy_from_user(&flags, (__u32 __user *)arg, sizeof(__u32))) @@ -1065,10 +1064,8 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) return -EINVAL; - q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev); - if (!q) - return -ENXIO; - if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q)) + if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && + !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) return -EOPNOTSUPP; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) @@ -1393,14 +1390,13 @@ resizefs_out: case FITRIM: { - struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 252c168454c7..ea653d19f9ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3498,7 +3498,7 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&lg->lg_prealloc_lock); } - if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev))) + if (bdev_nonrot(sb->s_bdev)) sbi->s_mb_max_linear_groups = 0; else sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT; @@ -3629,7 +3629,7 @@ static inline int ext4_issue_discard(struct super_block *sb, return __blkdev_issue_discard(sb->s_bdev, (sector_t)discard_block << (sb->s_blocksize_bits - 9), (sector_t)count << (sb->s_blocksize_bits - 9), - GFP_NOFS, 0, biop); + GFP_NOFS, biop); } else return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); } @@ -6455,7 +6455,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); + unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -6475,9 +6475,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) range->len < sb->s_blocksize) return -EINVAL; /* No point to try to trim less than discard granularity */ - if (range->minlen < q->limits.discard_granularity) { + if (range->minlen < discard_granularity) { minlen = EXT4_NUM_B2C(EXT4_SB(sb), - q->limits.discard_granularity >> sb->s_blocksize_bits); + discard_granularity >> sb->s_blocksize_bits); if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1466fbdbc8e3..6900da973ce2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5474,13 +5474,9 @@ no_journal: goto failed_mount9; } - if (test_opt(sb, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - ext4_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) + ext4_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8c570de21ed5..2b2b3c87e45e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4372,8 +4372,7 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) static inline bool f2fs_bdev_support_discard(struct block_device *bdev) { - return blk_queue_discard(bdev_get_queue(bdev)) || - bdev_is_zoned(bdev); + return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev); } static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5b89af0f27f0..35b6c720c2bc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2285,7 +2285,6 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; - struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret; @@ -2304,7 +2303,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) return ret; range.minlen = max((unsigned int)range.minlen, - q->limits.discard_granularity); + bdev_discard_granularity(sb->s_bdev)); ret = f2fs_trim_fs(F2FS_SB(sb), &range); mnt_drop_write_file(filp); if (ret < 0) @@ -3686,18 +3685,18 @@ out: static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, pgoff_t off, block_t block, block_t len, u32 flags) { - struct request_queue *q = bdev_get_queue(bdev); sector_t sector = SECTOR_FROM_BLOCK(block); sector_t nr_sects = SECTOR_FROM_BLOCK(len); int ret = 0; - if (!q) - return -ENXIO; - - if (flags & F2FS_TRIM_FILE_DISCARD) - ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS, - blk_queue_secure_erase(q) ? - BLKDEV_DISCARD_SECURE : 0); + if (flags & F2FS_TRIM_FILE_DISCARD) { + if (bdev_max_secure_erase_sectors(bdev)) + ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, + GFP_NOFS); + else + ret = blkdev_issue_discard(bdev, sector, nr_sects, + GFP_NOFS); + } if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { if (IS_ENCRYPTED(inode)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bd9731cdec56..7225ce09f3ab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1196,9 +1196,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, unsigned int *issued) { struct block_device *bdev = dc->bdev; - struct request_queue *q = bdev_get_queue(bdev); unsigned int max_discard_blocks = - SECTOR_TO_BLOCK(q->limits.max_discard_sectors); + SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); @@ -1245,7 +1244,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), - GFP_NOFS, 0, &bio); + GFP_NOFS, &bio); submit: if (err) { spin_lock_irqsave(&dc->lock, flags); @@ -1375,9 +1374,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct discard_info di = {0}; struct rb_node **insert_p = NULL, *insert_parent = NULL; - struct request_queue *q = bdev_get_queue(bdev); unsigned int max_discard_blocks = - SECTOR_TO_BLOCK(q->limits.max_discard_sectors); + SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); block_t end = lstart + len; dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, diff --git a/fs/fat/file.c b/fs/fat/file.c index a5a309fcc7fa..bf91f977debe 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -127,13 +127,12 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg) struct super_block *sb = inode->i_sb; struct fstrim_range __user *user_range; struct fstrim_range range; - struct request_queue *q = bdev_get_queue(sb->s_bdev); int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; user_range = (struct fstrim_range __user *)arg; @@ -141,7 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg) return -EFAULT; range.minlen = max_t(unsigned int, range.minlen, - q->limits.discard_granularity); + bdev_discard_granularity(sb->s_bdev)); err = fat_trim_fs(inode, &range); if (err < 0) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index bf6051bdf1d1..3d1afb95a925 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1872,13 +1872,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, goto out_fail; } - if (sbi->options.discard) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - fat_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev)) + fat_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); fat_set_state(sb, 1, 0); return 0; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 801ad9f4f2be..6d26bb525484 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1386,7 +1386,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) { struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); + struct block_device *bdev = sdp->sd_vfs->s_bdev; struct buffer_head *bh; struct gfs2_rgrpd *rgd; struct gfs2_rgrpd *rgd_end; @@ -1405,7 +1405,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(&r, argp, sizeof(r))) @@ -1418,8 +1418,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); - minlen = max_t(u64, minlen, - q->limits.discard_granularity) >> bs_shift; + minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) return -EINVAL; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b08f5dc31780..80f9b047aa1b 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, { atomic_inc(&dio->ref); - if (dio->iocb->ki_flags & IOCB_HIPRI) { + /* Sync dio can't be polled reliably */ + if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) { bio_set_polled(bio, dio->iocb); dio->submit.poll_bio = bio; } @@ -265,8 +266,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, * cache flushes on IO completion. */ if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && - (dio->flags & IOMAP_DIO_WRITE_FUA) && - blk_queue_fua(bdev_get_queue(iomap->bdev))) + (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev)) use_fua = true; } @@ -654,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->submit.waiter)) break; - if (!dio->submit.poll_bio || - !bio_poll(dio->submit.poll_bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fcacafa4510d..c0cbeeaec2d1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1762,7 +1762,6 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) unsigned long block, log_offset; /* logical */ unsigned long long phys_block, block_start, block_stop; /* physical */ loff_t byte_start, byte_stop, byte_count; - struct request_queue *q = bdev_get_queue(journal->j_dev); /* flags must be set to either discard or zeroout */ if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags || @@ -1770,10 +1769,8 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) (flags & JBD2_JOURNAL_FLUSH_ZEROOUT))) return -EINVAL; - if (!q) - return -ENXIO; - - if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q)) + if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && + !bdev_max_discard_sectors(journal->j_dev)) return -EOPNOTSUPP; /* @@ -1828,7 +1825,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) err = blkdev_issue_discard(journal->j_dev, byte_start >> SECTOR_SHIFT, byte_count >> SECTOR_SHIFT, - GFP_NOFS, 0); + GFP_NOFS); } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) { err = blkdev_issue_zeroout(journal->j_dev, byte_start >> SECTOR_SHIFT, diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 03a845ab4f00..1e7b177ece60 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -110,14 +110,13 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case FITRIM: { struct super_block *sb = inode->i_sb; - struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; s64 ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(sb->s_bdev)) { jfs_warn("FITRIM not supported on device"); return -EOPNOTSUPP; } @@ -127,7 +126,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; range.minlen = max_t(unsigned int, range.minlen, - q->limits.discard_granularity); + bdev_discard_granularity(sb->s_bdev)); ret = jfs_ioc_trim(inode, &range); if (ret < 0) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index f1a13a74cddf..85d4f44f2ac4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -372,19 +372,16 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } case Opt_discard: - { - struct request_queue *q = bdev_get_queue(sb->s_bdev); /* if set to 1, even copying files will cause * trimming :O * -> user has more control over the online trimming */ sbi->minblks_trim = 64; - if (blk_queue_discard(q)) + if (bdev_max_discard_sectors(sb->s_bdev)) *flag |= JFS_DISCARD; else pr_err("JFS: discard option not supported on device\n"); break; - } case Opt_nodiscard: *flag &= ~JFS_DISCARD; @@ -392,10 +389,9 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_discard_minblk: { - struct request_queue *q = bdev_get_queue(sb->s_bdev); char *minblks_trim = args[0].from; int rc; - if (blk_queue_discard(q)) { + if (bdev_max_discard_sectors(sb->s_bdev)) { *flag |= JFS_DISCARD; rc = kstrtouint(minblks_trim, 0, &sbi->minblks_trim); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index fec194a666f4..87e1004b606d 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1052,20 +1052,20 @@ out: static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - struct request_queue *q = bdev_get_queue(nilfs->ns_bdev); struct fstrim_range range; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(nilfs->ns_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, argp, sizeof(range))) return -EFAULT; - range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity); + range.minlen = max_t(u64, range.minlen, + bdev_discard_granularity(nilfs->ns_bdev)); down_read(&nilfs->ns_segctor_sem); ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index e385cca2004a..77ff8e95421f 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -1100,7 +1100,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (ret < 0) { put_bh(su_bh); goto out_sem; @@ -1134,7 +1134,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) ndiscarded += nblocks; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index dd48a8f74d57..3b4a079c9617 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -672,7 +672,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (ret < 0) return ret; nblocks = 0; @@ -682,7 +682,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); return ret; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 787b53b984ee..15806eeae217 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -22,20 +22,20 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) { struct fstrim_range __user *user_range; struct fstrim_range range; - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sbi->sb->s_bdev)) return -EOPNOTSUPP; user_range = (struct fstrim_range __user *)arg; if (copy_from_user(&range, user_range, sizeof(range))) return -EFAULT; - range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity); + range.minlen = max_t(u32, range.minlen, + bdev_discard_granularity(sbi->sb->s_bdev)); err = ntfs_trim_fs(sbi, &range); if (err < 0) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 278dcf502410..5781b9e8e3d8 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -882,7 +882,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) int err; struct ntfs_sb_info *sbi = sb->s_fs_info; struct block_device *bdev = sb->s_bdev; - struct request_queue *rq; struct inode *inode; struct ntfs_inode *ni; size_t i, tt; @@ -912,15 +911,14 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto out; } - rq = bdev_get_queue(bdev); - if (blk_queue_discard(rq) && rq->limits.discard_granularity) { - sbi->discard_granularity = rq->limits.discard_granularity; + if (bdev_max_discard_sectors(bdev) && bdev_discard_granularity(bdev)) { + sbi->discard_granularity = bdev_discard_granularity(bdev); sbi->discard_granularity_mask_inv = ~(u64)(sbi->discard_granularity - 1); } /* Parse boot. */ - err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512, + err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev), bdev_nr_bytes(bdev)); if (err) goto out; @@ -1335,7 +1333,7 @@ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) return 0; err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (err == -EOPNOTSUPP) sbi->flags |= NTFS_FLAGS_NODISCARD; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index f59461d85da4..afd54ec66103 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -903,20 +903,19 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case FITRIM: { struct super_block *sb = inode->i_sb; - struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, argp, sizeof(range))) return -EFAULT; - range.minlen = max_t(u64, q->limits.discard_granularity, + range.minlen = max_t(u64, bdev_discard_granularity(sb->s_bdev), range.minlen); ret = ocfs2_trim_fs(sb, &range); if (ret < 0) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 622c844f6d11..8879d052f96c 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -86,17 +86,10 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, int error, i; struct bio *bio; - if (page_count <= BIO_MAX_VECS) { - bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO); - } else { - bio = bio_kmalloc(GFP_NOIO, page_count); - bio_set_dev(bio, sb->s_bdev); - bio->bi_opf = REQ_OP_READ; - } - + bio = bio_kmalloc(page_count, GFP_NOIO); if (!bio) return -ENOMEM; - + bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ); bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT); for (i = 0; i < page_count; ++i) { @@ -126,7 +119,8 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, out_free_bio: bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); return error; } @@ -190,7 +184,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, length |= data[0] << 8; } bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); compressed = SQUASHFS_COMPRESSED(length); length = SQUASHFS_COMPRESSED_SIZE(length); @@ -224,7 +219,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, out_free_bio: bio_free_pages(bio); - bio_put(bio); + bio_uninit(bio); + kfree(bio); out: if (res < 0) { ERROR("Failed to read block 0x%llx: %d\n", index, res); diff --git a/fs/super.c b/fs/super.c index f1d4a193602d..60f57c7bc0a6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1204,7 +1204,7 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi); - if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue)) + if (bdev_stable_writes(s->s_bdev)) s->s_iflags |= SB_I_STABLE_WRITES; return 0; } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 0191de8ce9ce..c6fe3f6ebb6b 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -114,7 +114,7 @@ xfs_trim_extents( } trace_xfs_discard_extent(mp, agno, fbno, flen); - error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); + error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); if (error) goto out_del_cursor; *blocks_trimmed += flen; @@ -152,8 +152,8 @@ xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { - struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev); - unsigned int granularity = q->limits.discard_granularity; + unsigned int granularity = + bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); struct fstrim_range range; xfs_daddr_t start, end, minlen; xfs_agnumber_t start_agno, end_agno, agno; @@ -162,7 +162,7 @@ xfs_ioc_trim( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) return -EOPNOTSUPP; /* diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ba57323bfdce..c9f55e4f0957 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -605,7 +605,7 @@ xlog_discard_busy_extents( error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, 0, &bio); + GFP_NOFS, &bio); if (error && error != -EOPNOTSUPP) { xfs_info(mp, "discard failed for extent [0x%llx,%u], error %d", diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 54be9d64093e..a276b8111f63 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1608,14 +1608,10 @@ xfs_fs_fill_super( goto out_filestream_unmount; } - if (xfs_has_discard(mp)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - - if (!blk_queue_discard(q)) { - xfs_warn(mp, "mounting with \"discard\" option, but " - "the device does not support discard"); - mp->m_features &= ~XFS_FEAT_DISCARD; - } + if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { + xfs_warn(mp, + "mounting with \"discard\" option, but the device does not support discard"); + mp->m_features &= ~XFS_FEAT_DISCARD; } if (xfs_has_reflink(mp)) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index e20e7c841489..82da583da17b 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -689,13 +689,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); struct zonefs_inode_info *zi = ZONEFS_I(inode); struct block_device *bdev = inode->i_sb->s_bdev; - unsigned int max; + unsigned int max = bdev_max_zone_append_sectors(bdev); struct bio *bio; ssize_t size; int nr_pages; ssize_t ret; - max = queue_max_zone_append_sectors(bdev_get_queue(bdev)); max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); iov_iter_truncate(from, max); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 87ce24d238f3..2bd073fa6bb5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,8 +17,6 @@ #include <linux/backing-dev-defs.h> #include <linux/slab.h> -struct blkcg; - static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -154,7 +152,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp); void wb_memcg_offline(struct mem_cgroup *memcg); -void wb_blkcg_offline(struct blkcg *blkcg); +void wb_blkcg_offline(struct cgroup_subsys_state *css); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode @@ -378,7 +376,7 @@ static inline void wb_memcg_offline(struct mem_cgroup *memcg) { } -static inline void wb_blkcg_offline(struct blkcg *blkcg) +static inline void wb_blkcg_offline(struct cgroup_subsys_state *css) { } diff --git a/include/linux/bio.h b/include/linux/bio.h index 00450fd86bb4..1cf3738ef1ea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,9 +408,7 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask, struct bio_set *bs); -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); -struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); +struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, @@ -785,6 +783,12 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } +static inline void bio_clear_polled(struct bio *bio) +{ + /* can't support alloc cache if we turn off polling */ + bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE); +} + struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, unsigned int opf, gfp_t gfp); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 652cd05b0924..9f40dbc65f82 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -14,265 +14,39 @@ * Nauman Rafique <nauman@google.com> */ -#include <linux/cgroup.h> -#include <linux/percpu.h> -#include <linux/percpu_counter.h> -#include <linux/u64_stats_sync.h> -#include <linux/seq_file.h> -#include <linux/radix-tree.h> -#include <linux/blkdev.h> -#include <linux/atomic.h> -#include <linux/kthread.h> -#include <linux/fs.h> +#include <linux/types.h> + +struct bio; +struct cgroup_subsys_state; +struct request_queue; #define FC_APPID_LEN 129 #ifdef CONFIG_BLK_CGROUP - -enum blkg_iostat_type { - BLKG_IOSTAT_READ, - BLKG_IOSTAT_WRITE, - BLKG_IOSTAT_DISCARD, - - BLKG_IOSTAT_NR, -}; - -struct blkcg_gq; -struct blkg_policy_data; - -struct blkcg { - struct cgroup_subsys_state css; - spinlock_t lock; - refcount_t online_pin; - - struct radix_tree_root blkg_tree; - struct blkcg_gq __rcu *blkg_hint; - struct hlist_head blkg_list; - - struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; - - struct list_head all_blkcgs_node; -#ifdef CONFIG_BLK_CGROUP_FC_APPID - char fc_app_id[FC_APPID_LEN]; -#endif -#ifdef CONFIG_CGROUP_WRITEBACK - struct list_head cgwb_list; -#endif -}; - -struct blkg_iostat { - u64 bytes[BLKG_IOSTAT_NR]; - u64 ios[BLKG_IOSTAT_NR]; -}; - -struct blkg_iostat_set { - struct u64_stats_sync sync; - struct blkg_iostat cur; - struct blkg_iostat last; -}; - -/* association between a blk cgroup and a request queue */ -struct blkcg_gq { - /* Pointer to the associated request_queue */ - struct request_queue *q; - struct list_head q_node; - struct hlist_node blkcg_node; - struct blkcg *blkcg; - - /* all non-root blkcg_gq's are guaranteed to have access to parent */ - struct blkcg_gq *parent; - - /* reference count */ - struct percpu_ref refcnt; - - /* is this blkg online? protected by both blkcg and q locks */ - bool online; - - struct blkg_iostat_set __percpu *iostat_cpu; - struct blkg_iostat_set iostat; - - struct blkg_policy_data *pd[BLKCG_MAX_POLS]; - - spinlock_t async_bio_lock; - struct bio_list async_bios; - union { - struct work_struct async_bio_work; - struct work_struct free_work; - }; - - atomic_t use_delay; - atomic64_t delay_nsec; - atomic64_t delay_start; - u64 last_delay; - int last_use; - - struct rcu_head rcu_head; -}; - extern struct cgroup_subsys_state * const blkcg_root_css; -void blkcg_destroy_blkgs(struct blkcg *blkcg); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); void blkcg_maybe_throttle_current(void); - -static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct blkcg, css) : NULL; -} - -/** - * bio_blkcg - grab the blkcg associated with a bio - * @bio: target bio - * - * This returns the blkcg associated with a bio, %NULL if not associated. - * Callers are expected to either handle %NULL or know association has been - * done prior to calling this. - */ -static inline struct blkcg *bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return NULL; -} - -static inline bool blk_cgroup_congested(void) -{ - struct cgroup_subsys_state *css; - bool ret = false; - - rcu_read_lock(); - css = kthread_blkcg(); - if (!css) - css = task_css(current, io_cgrp_id); - while (css) { - if (atomic_read(&css->cgroup->congestion_count)) { - ret = true; - break; - } - css = css->parent; - } - rcu_read_unlock(); - return ret; -} - -/** - * blkcg_parent - get the parent of a blkcg - * @blkcg: blkcg of interest - * - * Return the parent blkcg of @blkcg. Can be called anytime. - */ -static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) -{ - return css_to_blkcg(blkcg->css.parent); -} - -/** - * blkcg_pin_online - pin online state - * @blkcg: blkcg of interest - * - * While pinned, a blkcg is kept online. This is primarily used to - * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline - * while an associated cgwb is still active. - */ -static inline void blkcg_pin_online(struct blkcg *blkcg) -{ - refcount_inc(&blkcg->online_pin); -} - -/** - * blkcg_unpin_online - unpin online state - * @blkcg: blkcg of interest - * - * This is primarily used to impedance-match blkg and cgwb lifetimes so - * that blkg doesn't go offline while an associated cgwb is still active. - * When this count goes to zero, all active cgwbs have finished so the - * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - */ -static inline void blkcg_unpin_online(struct blkcg *blkcg) -{ - do { - if (!refcount_dec_and_test(&blkcg->online_pin)) - break; - blkcg_destroy_blkgs(blkcg); - blkcg = blkcg_parent(blkcg); - } while (blkcg); -} +bool blk_cgroup_congested(void); +void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css); +void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css); +struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ -struct blkcg { -}; - -struct blkcg_gq { -}; - #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } - -#ifdef CONFIG_BLOCK static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } -static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } -#endif /* CONFIG_BLOCK */ - -#endif /* CONFIG_BLK_CGROUP */ - -#ifdef CONFIG_BLK_CGROUP_FC_APPID -/* - * Sets the fc_app_id field associted to blkcg - * @app_id: application identifier - * @cgrp_id: cgroup id - * @app_id_len: size of application identifier - */ -static inline int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len) -{ - struct cgroup *cgrp; - struct cgroup_subsys_state *css; - struct blkcg *blkcg; - int ret = 0; - - if (app_id_len > FC_APPID_LEN) - return -EINVAL; - - cgrp = cgroup_get_from_id(cgrp_id); - if (!cgrp) - return -ENOENT; - css = cgroup_get_e_css(cgrp, &io_cgrp_subsys); - if (!css) { - ret = -ENOENT; - goto out_cgrp_put; - } - blkcg = css_to_blkcg(css); - /* - * There is a slight race condition on setting the appid. - * Worst case an I/O may not find the right id. - * This is no different from the I/O we let pass while obtaining - * the vmid from the fabric. - * Adding the overhead of a lock is not necessary. - */ - strlcpy(blkcg->fc_app_id, app_id, app_id_len); - css_put(css); -out_cgrp_put: - cgroup_put(cgrp); - return ret; -} - -/** - * blkcg_get_fc_appid - get the fc app identifier associated with a bio - * @bio: target bio - * - * On success return the fc_app_id, on failure return NULL - */ -static inline char *blkcg_get_fc_appid(struct bio *bio) +static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio) { - if (bio && bio->bi_blkg && - (bio->bi_blkg->blkcg->fc_app_id[0] != '\0')) - return bio->bi_blkg->blkcg->fc_app_id; return NULL; } -#else -static inline int blkcg_set_fc_appid(char *buf, u64 id, size_t len) { return -EINVAL; } -static inline char *blkcg_get_fc_appid(struct bio *bio) { return NULL; } -#endif /*CONFIG_BLK_CGROUP_FC_APPID*/ +#endif /* CONFIG_BLK_CGROUP */ + +int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len); +char *blkcg_get_fc_appid(struct bio *bio); + #endif /* _BLK_CGROUP_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1973ef9bd40f..40e815400611 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -246,9 +246,8 @@ typedef unsigned int blk_qc_t; struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_opf; /* bottom bits req flags, - * top bits REQ_OP. Use - * accessors. + unsigned int bi_opf; /* bottom bits REQ_OP, top bits + * req_flags. */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; @@ -329,7 +328,6 @@ enum { BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ - BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ BIO_FLAG_LAST }; @@ -409,15 +407,17 @@ enum req_flag_bits { * work item to avoid such priority inversions. */ __REQ_CGROUP_PUNT, + __REQ_POLLED, /* caller polls for completion using bio_poll */ + __REQ_ALLOC_CACHE, /* allocate IO from cache if available */ + __REQ_SWAP, /* swap I/O */ + __REQ_DRV, /* for driver use */ - /* command specific flags for REQ_OP_WRITE_ZEROES: */ + /* + * Command specific flags, keep last: + */ + /* for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_POLLED, /* caller polls for completion using bio_poll */ - - /* for driver use */ - __REQ_DRV, - __REQ_SWAP, /* swapping request. */ __REQ_NR_BITS, /* stops here */ }; @@ -439,6 +439,7 @@ enum req_flag_bits { #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) #define REQ_POLLED (1ULL << __REQ_POLLED) +#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60d016138997..34724b15813b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -248,6 +248,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -540,10 +541,8 @@ struct request_queue { #define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ -#define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ -#define QUEUE_FLAG_SECERASE 11 /* supports secure erase */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -582,11 +581,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) -#define blk_queue_secure_erase(q) \ - (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) @@ -602,7 +598,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) -#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) @@ -950,6 +945,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_discard_segments(struct request_queue *, unsigned short); +void blk_queue_max_secure_erase_sectors(struct request_queue *q, + unsigned int max_sectors); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1090,13 +1087,12 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); -#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ - -extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop); +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask); +int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop); +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ @@ -1115,7 +1111,7 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block, SECTOR_SHIFT), nr_blocks << (sb->s_blocksize_bits - SECTOR_SHIFT), - gfp_mask, flags); + gfp_mask); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) @@ -1189,6 +1185,12 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu return min(l->max_zone_append_sectors, l->max_sectors); } +static inline unsigned int +bdev_max_zone_append_sectors(struct block_device *bdev) +{ + return queue_max_zone_append_sectors(bdev_get_queue(bdev)); +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; @@ -1246,84 +1248,54 @@ bdev_zone_write_granularity(struct block_device *bdev) return queue_zone_write_granularity(bdev_get_queue(bdev)); } -static inline int queue_alignment_offset(const struct request_queue *q) -{ - if (q->limits.misaligned) - return -1; +int bdev_alignment_offset(struct block_device *bdev); +unsigned int bdev_discard_alignment(struct block_device *bdev); - return q->limits.alignment_offset; +static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev) +{ + return bdev_get_queue(bdev)->limits.max_discard_sectors; } -static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) +static inline unsigned int bdev_discard_granularity(struct block_device *bdev) { - unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) - << SECTOR_SHIFT; + return bdev_get_queue(bdev)->limits.discard_granularity; +} - return (granularity + lim->alignment_offset - alignment) % granularity; +static inline unsigned int +bdev_max_secure_erase_sectors(struct block_device *bdev) +{ + return bdev_get_queue(bdev)->limits.max_secure_erase_sectors; } -static inline int bdev_alignment_offset(struct block_device *bdev) +static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q->limits.misaligned) - return -1; - if (bdev_is_partition(bdev)) - return queue_limit_alignment_offset(&q->limits, - bdev->bd_start_sect); - return q->limits.alignment_offset; + if (q) + return q->limits.max_write_zeroes_sectors; + + return 0; } -static inline int queue_discard_alignment(const struct request_queue *q) +static inline bool bdev_nonrot(struct block_device *bdev) { - if (q->limits.discard_misaligned) - return -1; - - return q->limits.discard_alignment; + return blk_queue_nonrot(bdev_get_queue(bdev)); } -static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) +static inline bool bdev_stable_writes(struct block_device *bdev) { - unsigned int alignment, granularity, offset; - - if (!lim->max_discard_sectors) - return 0; - - /* Why are these in bytes, not sectors? */ - alignment = lim->discard_alignment >> SECTOR_SHIFT; - granularity = lim->discard_granularity >> SECTOR_SHIFT; - if (!granularity) - return 0; - - /* Offset of the partition start in 'granularity' sectors */ - offset = sector_div(sector, granularity); - - /* And why do we do this modulus *again* in blkdev_issue_discard()? */ - offset = (granularity + alignment - offset) % granularity; - - /* Turn it back into bytes, gaah */ - return offset << SECTOR_SHIFT; + return test_bit(QUEUE_FLAG_STABLE_WRITES, + &bdev_get_queue(bdev)->queue_flags); } -static inline int bdev_discard_alignment(struct block_device *bdev) +static inline bool bdev_write_cache(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (bdev_is_partition(bdev)) - return queue_limit_discard_alignment(&q->limits, - bdev->bd_start_sect); - return q->limits.discard_alignment; + return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); } -static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) +static inline bool bdev_fua(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_zeroes_sectors; - - return 0; + return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); } static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) @@ -1491,9 +1463,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, - unsigned int op); -void disk_end_io_acct(struct gendisk *disk, unsigned int op, +unsigned long bdev_start_io_acct(struct block_device *bdev, + unsigned int sectors, unsigned int op, + unsigned long start_time); +void bdev_end_io_acct(struct block_device *bdev, unsigned int op, unsigned long start_time); void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 22501a293fa5..623e22492afa 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -27,12 +27,10 @@ struct blk_trace { atomic_t dropped; }; -struct blkcg; - extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern __printf(3, 4) -void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...); +__printf(3, 4) void __blk_trace_note_message(struct blk_trace *bt, + struct cgroup_subsys_state *css, const char *fmt, ...); /** * blk_add_trace_msg - Add a (simple) message to the blktrace stream @@ -47,14 +45,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f * NOTE: Can not use 'static inline' due to presence of var args... * **/ -#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ +#define blk_add_cgroup_trace_msg(q, css, fmt, ...) \ do { \ struct blk_trace *bt; \ \ rcu_read_lock(); \ bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ - __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + __blk_trace_note_message(bt, css, fmt, ##__VA_ARGS__);\ rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index de5d75bafd66..30e5bec81d2b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -222,9 +222,5 @@ void kthread_associate_blkcg(struct cgroup_subsys_state *css); struct cgroup_subsys_state *kthread_blkcg(void); #else static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } -static inline struct cgroup_subsys_state *kthread_blkcg(void) -{ - return NULL; -} #endif #endif /* _LINUX_KTHREAD_H */ diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 675f3a1fe613..773963a1e0b5 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -14,7 +14,7 @@ #define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 #define TRANSPORT_FLAG_PASSTHROUGH_PGR 0x4 -struct request_queue; +struct block_device; struct scatterlist; struct target_backend_ops { @@ -117,7 +117,7 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, bool target_sense_desc_format(struct se_device *dev); sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, - struct request_queue *q); + struct block_device *bdev); static inline bool target_dev_configured(struct se_device *se_dev) { diff --git a/kernel/kthread.c b/kernel/kthread.c index 50265f69a135..544fd4097406 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1522,5 +1522,4 @@ struct cgroup_subsys_state *kthread_blkcg(void) } return NULL; } -EXPORT_SYMBOL(kthread_blkcg); #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 4d5629196d01..10a32b0f2deb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt) local_irq_restore(flags); } -void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, - const char *fmt, ...) +void __blk_trace_note_message(struct blk_trace *bt, + struct cgroup_subsys_state *css, const char *fmt, ...) { int n; va_list args; unsigned long flags; char *buf; + u64 cgid = 0; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) @@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); va_end(args); - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) - blkcg = NULL; #ifdef CONFIG_BLK_CGROUP - trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, - blkcg ? cgroup_id(blkcg->css.cgroup) : 1); -#else - trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0); + if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) + cgid = cgroup_id(css->cgroup); + else + cgid = 1; #endif + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(__trace_note_message); +EXPORT_SYMBOL_GPL(__blk_trace_note_message); static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) @@ -411,7 +411,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return PTR_ERR(msg); bt = filp->private_data; - __trace_note_message(bt, NULL, "%s", msg); + __blk_trace_note_message(bt, NULL, "%s", msg); kfree(msg); return count; @@ -783,6 +783,7 @@ void blk_trace_shutdown(struct request_queue *q) #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { + struct cgroup_subsys_state *blkcg_css; struct blk_trace *bt; /* We don't use the 'bt' value here except as an optimization... */ @@ -790,9 +791,10 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; - if (!bio->bi_blkg) + blkcg_css = bio_blkcg_css(bio); + if (!blkcg_css) return 0; - return cgroup_id(bio_blkcg(bio)->css.cgroup); + return cgroup_id(blkcg_css->cgroup); } #else static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7176af65b103..ff60bd7d74e0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/blkdev.h> #include <linux/wait.h> #include <linux/rbtree.h> #include <linux/kthread.h> @@ -390,7 +391,6 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); - struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); struct backing_dev_info *bdi = wb->bdi; mutex_lock(&wb->bdi->cgwb_release_mutex); @@ -401,7 +401,7 @@ static void cgwb_release_workfn(struct work_struct *work) mutex_unlock(&wb->bdi->cgwb_release_mutex); /* triggers blkg destruction if no online users left */ - blkcg_unpin_online(blkcg); + blkcg_unpin_online(wb->blkcg_css); fprop_local_destroy_percpu(&wb->memcg_completions); @@ -446,7 +446,6 @@ static int cgwb_create(struct backing_dev_info *bdi, { struct mem_cgroup *memcg; struct cgroup_subsys_state *blkcg_css; - struct blkcg *blkcg; struct list_head *memcg_cgwb_list, *blkcg_cgwb_list; struct bdi_writeback *wb; unsigned long flags; @@ -454,9 +453,8 @@ static int cgwb_create(struct backing_dev_info *bdi, memcg = mem_cgroup_from_css(memcg_css); blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); - blkcg = css_to_blkcg(blkcg_css); memcg_cgwb_list = &memcg->cgwb_list; - blkcg_cgwb_list = &blkcg->cgwb_list; + blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css); /* look up again under lock and discard on blkcg mismatch */ spin_lock_irqsave(&cgwb_lock, flags); @@ -511,7 +509,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_pin_online(blkcg); + blkcg_pin_online(blkcg_css); css_get(memcg_css); css_get(blkcg_css); } @@ -724,18 +722,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg) /** * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined - * @blkcg: blkcg being offlined + * @css: blkcg being offlined * * Also prevents creation of any new wb's associated with @blkcg. */ -void wb_blkcg_offline(struct blkcg *blkcg) +void wb_blkcg_offline(struct cgroup_subsys_state *css) { struct bdi_writeback *wb, *next; + struct list_head *list = blkcg_get_cgwb_list(css); spin_lock_irq(&cgwb_lock); - list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node) + list_for_each_entry_safe(wb, next, list, blkcg_node) cgwb_kill(wb); - blkcg->cgwb_list.next = NULL; /* prevent new wb's */ + list->next = NULL; /* prevent new wb's */ spin_unlock_irq(&cgwb_lock); } diff --git a/mm/page_io.c b/mm/page_io.c index 89fbf3cae30f..3fbdab6a940e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) * attempt to access it in the page fault retry time check. */ if (synchronous) { - bio->bi_opf |= REQ_POLLED; get_task_struct(current); bio->bi_private = current; } @@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!bio_poll(bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); bio_put(bio); diff --git a/mm/readahead.c b/mm/readahead.c index 4a60cdb64262..26bf74a6b2fe 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -113,6 +113,7 @@ * ->readpage() which may be less efficient. */ +#include <linux/blkdev.h> #include <linux/kernel.h> #include <linux/dax.h> #include <linux/gfp.h> diff --git a/mm/swapfile.c b/mm/swapfile.c index 63c61f8b2611..981a6e85c88e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -6,6 +6,7 @@ * Swap reorganised 29.12.95, Stephen Tweedie */ +#include <linux/blkdev.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -179,7 +180,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); @@ -190,7 +191,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) break; @@ -254,7 +255,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_NOIO, 0)) + nr_blocks, GFP_NOIO)) break; se = next_se(se); @@ -2466,7 +2467,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); - if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev))) + if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); @@ -2761,7 +2762,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ - if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) + if (bdev_is_zoned(p->bdev)) return -EINVAL; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { @@ -2957,20 +2958,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, return nr_extents; } -/* - * Helper to sys_swapon determining if a given swap - * backing device queue supports DISCARD operations. - */ -static bool swap_discardable(struct swap_info_struct *si) -{ - struct request_queue *q = bdev_get_queue(si->bdev); - - if (!blk_queue_discard(q)) - return false; - - return true; -} - SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *p; @@ -3065,13 +3052,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) + if (p->bdev && bdev_stable_writes(p->bdev)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) p->flags |= SWP_SYNCHRONOUS_IO; - if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { + if (p->bdev && bdev_nonrot(p->bdev)) { int cpu; unsigned long ci, nr_cluster; @@ -3132,7 +3119,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sizeof(long), GFP_KERNEL); - if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { + if ((swap_flags & SWAP_FLAG_DISCARD) && + p->bdev && bdev_max_discard_sectors(p->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in |