diff options
author | Linus Torvalds | 2021-11-01 09:19:50 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-11-01 09:19:50 -0700 |
commit | 33c8846c814c1c27c6e33af005042d15061f948b (patch) | |
tree | da7c105b61758094d1d55ec1326ff28b521dbe9e /fs | |
parent | 9ac211426fb6747c92d570647e2ce889e33cbffd (diff) | |
parent | 9b84c629c90374498ab5825dede74a06ea1c775b (diff) |
Merge tag 'for-5.16/block-2021-10-29' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- mq-deadline accounting improvements (Bart)
- blk-wbt timer fix (Andrea)
- Untangle the block layer includes (Christoph)
- Rework the poll support to be bio based, which will enable adding
support for polling for bio based drivers (Christoph)
- Block layer core support for multi-actuator drives (Damien)
- blk-crypto improvements (Eric)
- Batched tag allocation support (me)
- Request completion batching support (me)
- Plugging improvements (me)
- Shared tag set improvements (John)
- Concurrent queue quiesce support (Ming)
- Cache bdev in ->private_data for block devices (Pavel)
- bdev dio improvements (Pavel)
- Block device invalidation and block size improvements (Xie)
- Various cleanups, fixes, and improvements (Christoph, Jackie,
Masahira, Tejun, Yu, Pavel, Zheng, me)
* tag 'for-5.16/block-2021-10-29' of git://git.kernel.dk/linux-block: (174 commits)
blk-mq-debugfs: Show active requests per queue for shared tags
block: improve readability of blk_mq_end_request_batch()
virtio-blk: Use blk_validate_block_size() to validate block size
loop: Use blk_validate_block_size() to validate block size
nbd: Use blk_validate_block_size() to validate block size
block: Add a helper to validate the block size
block: re-flow blk_mq_rq_ctx_init()
block: prefetch request to be initialized
block: pass in blk_mq_tags to blk_mq_rq_ctx_init()
block: add rq_flags to struct blk_mq_alloc_data
block: add async version of bio_set_polled
block: kill DIO_MULTI_BIO
block: kill unused polling bits in __blkdev_direct_IO()
block: avoid extra iter advance with async iocb
block: Add independent access ranges support
blk-mq: don't issue request directly in case that current is to be blocked
sbitmap: silence data race warning
blk-cgroup: synchronize blkg creation against policy deactivation
block: refactor bio_iov_bvec_set()
block: add single bio async direct IO helper
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/compression.c | 1 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 9 | ||||
-rw-r--r-- | fs/direct-io.c | 14 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 1 | ||||
-rw-r--r-- | fs/fs-writeback.c | 5 | ||||
-rw-r--r-- | fs/gfs2/file.c | 4 | ||||
-rw-r--r-- | fs/io_uring.c | 24 | ||||
-rw-r--r-- | fs/iomap/direct-io.c | 57 | ||||
-rw-r--r-- | fs/ntfs/file.c | 1 | ||||
-rw-r--r-- | fs/ntfs3/file.c | 1 | ||||
-rw-r--r-- | fs/orangefs/inode.c | 2 | ||||
-rw-r--r-- | fs/orangefs/super.c | 1 | ||||
-rw-r--r-- | fs/quota/quota.c | 1 | ||||
-rw-r--r-- | fs/ramfs/inode.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 2 | ||||
-rw-r--r-- | fs/zonefs/super.c | 2 |
18 files changed, 68 insertions, 61 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 0913ee50e6c3..6c7eb80220ca 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/kthread.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84627cbd5b5b..66290b214f2b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/mm.h> +#include <linux/error-injection.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c096ab9bb5e..954b53a90f04 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6,6 +6,7 @@ #include <crypto/hash.h> #include <linux/kernel.h> #include <linux/bio.h> +#include <linux/blk-cgroup.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> @@ -8248,7 +8249,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, return dip; } -static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, +static void btrfs_submit_direct(const struct iomap_iter *iter, struct bio *dio_bio, loff_t file_offset) { struct inode *inode = iter->inode; @@ -8278,7 +8279,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, } dio_bio->bi_status = BLK_STS_RESOURCE; bio_endio(dio_bio); - return BLK_QC_T_NONE; + return; } if (!write) { @@ -8372,15 +8373,13 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, free_extent_map(em); } while (submit_len > 0); - return BLK_QC_T_NONE; + return; out_err_em: free_extent_map(em); out_err: dip->dio_bio->bi_status = status; btrfs_dio_private_put(dip); - - return BLK_QC_T_NONE; } const struct iomap_ops btrfs_dio_iomap_ops = { diff --git a/fs/direct-io.c b/fs/direct-io.c index b2e86e739d7a..453dcff0e7f5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -119,7 +119,6 @@ struct dio { int flags; /* doesn't change */ int op; int op_flags; - blk_qc_t bio_cookie; struct gendisk *bio_disk; struct inode *inode; loff_t i_size; /* i_size when submitted */ @@ -438,11 +437,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->bio_disk = bio->bi_bdev->bd_disk; - if (sdio->submit_io) { + if (sdio->submit_io) sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); - dio->bio_cookie = BLK_QC_T_NONE; - } else - dio->bio_cookie = submit_bio(bio); + else + submit_bio(bio); sdio->bio = NULL; sdio->boundary = 0; @@ -481,9 +479,7 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) - blk_io_schedule(); + blk_io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; @@ -1214,8 +1210,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } else { dio->op = REQ_OP_READ; } - if (iocb->ki_flags & IOCB_HIPRI) - dio->op_flags |= REQ_HIPRI; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac0e11bbb445..9c5559faacda 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -915,7 +915,7 @@ const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1bf9ad4c220..20a083dc9042 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> +#include <linux/moduleparam.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/lzo.h> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 81ec192ce067..4124a89a1a5d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1893,7 +1893,8 @@ static long writeback_sb_inodes(struct super_block *sb, * unplug, so get our IOs out the door before we * give up the CPU. */ - blk_flush_plug(current); + if (current->plug) + blk_flush_plug(current->plug, false); cond_resched(); } @@ -2291,7 +2292,7 @@ void wakeup_flusher_threads(enum wb_reason reason) * If we are expecting writeback progress we must submit plugged IO. */ if (blk_needs_flush_plug(current)) - blk_schedule_flush_plug(current); + blk_flush_plug(current->plug, true); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 078ef29e31bc..5436a688157a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1351,7 +1351,7 @@ const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, @@ -1384,7 +1384,7 @@ const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, diff --git a/fs/io_uring.c b/fs/io_uring.c index 3a098b473401..057d07cee9f8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2454,14 +2454,16 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { struct io_kiocb *req, *tmp; + unsigned int poll_flags = BLK_POLL_NOSLEEP; + DEFINE_IO_COMP_BATCH(iob); LIST_HEAD(done); - bool spin; /* * Only spin for completions if we don't have multiple devices hanging * off our complete list, and we're under the requested amount. */ - spin = !ctx->poll_multi_queue && *nr_events < min; + if (ctx->poll_multi_queue || *nr_events >= min) + poll_flags |= BLK_POLL_ONESHOT; list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { struct kiocb *kiocb = &req->rw.kiocb; @@ -2479,17 +2481,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) - spin = false; + poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ - if (READ_ONCE(req->iopoll_completed)) + if (!rq_list_empty(iob.req_list) || + READ_ONCE(req->iopoll_completed)) list_move_tail(&req->inflight_entry, &done); } + if (!rq_list_empty(iob.req_list)) + iob.complete(&iob); if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); @@ -2735,19 +2740,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ctx->poll_multi_queue = false; } else if (!ctx->poll_multi_queue) { struct io_kiocb *list_req; - unsigned int queue_num0, queue_num1; list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, inflight_entry); - if (list_req->file != req->file) { + if (list_req->file != req->file) ctx->poll_multi_queue = true; - } else { - queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); - queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); - if (queue_num0 != queue_num1) - ctx->poll_multi_queue = true; - } } /* diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 4ecd255e0511..83ecfba53abe 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -38,8 +38,7 @@ struct iomap_dio { struct { struct iov_iter *iter; struct task_struct *waiter; - struct request_queue *last_queue; - blk_qc_t cookie; + struct bio *poll_bio; } submit; /* used for aio completion: */ @@ -49,29 +48,20 @@ struct iomap_dio { }; }; -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin) -{ - struct request_queue *q = READ_ONCE(kiocb->private); - - if (!q) - return 0; - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin); -} -EXPORT_SYMBOL_GPL(iomap_dio_iopoll); - static void iomap_dio_submit_bio(const struct iomap_iter *iter, struct iomap_dio *dio, struct bio *bio, loff_t pos) { atomic_inc(&dio->ref); - if (dio->iocb->ki_flags & IOCB_HIPRI) + if (dio->iocb->ki_flags & IOCB_HIPRI) { bio_set_polled(bio, dio->iocb); + dio->submit.poll_bio = bio; + } - dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev); if (dio->dops && dio->dops->submit_io) - dio->submit.cookie = dio->dops->submit_io(iter, bio, pos); + dio->dops->submit_io(iter, bio, pos); else - dio->submit.cookie = submit_bio(bio); + submit_bio(bio); } ssize_t iomap_dio_complete(struct iomap_dio *dio) @@ -164,9 +154,11 @@ static void iomap_dio_bio_end_io(struct bio *bio) } else if (dio->flags & IOMAP_DIO_WRITE) { struct inode *inode = file_inode(dio->iocb->ki_filp); + WRITE_ONCE(dio->iocb->private, NULL); INIT_WORK(&dio->aio.work, iomap_dio_complete_work); queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); } else { + WRITE_ONCE(dio->iocb->private, NULL); iomap_dio_complete_work(&dio->aio.work); } } @@ -282,6 +274,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, if (!iov_iter_count(dio->submit.iter)) goto out; + /* + * We can only poll for single bio I/Os. + */ + if (need_zeroout || + ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) + dio->iocb->ki_flags &= ~IOCB_HIPRI; + if (need_zeroout) { /* zero out from the start of the block to the write offset */ pad = pos & (fs_block_size - 1); @@ -339,6 +338,11 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); + /* + * We can only poll for single bio I/Os. + */ + if (nr_pages) + dio->iocb->ki_flags &= ~IOCB_HIPRI; iomap_dio_submit_bio(iter, dio, bio, pos); pos += n; } while (nr_pages); @@ -485,8 +489,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->submit.iter = iter; dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; + dio->submit.poll_bio = NULL; if (iov_iter_rw(iter) == READ) { if (iomi.pos >= dio->i_size) @@ -565,8 +568,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, inode_dio_begin(inode); blk_start_plug(&plug); - while ((ret = iomap_iter(&iomi, ops)) > 0) + while ((ret = iomap_iter(&iomi, ops)) > 0) { iomi.processed = iomap_dio_iter(&iomi, dio); + + /* + * We can only poll for single bio I/Os. + */ + iocb->ki_flags &= ~IOCB_HIPRI; + } + blk_finish_plug(&plug); /* @@ -592,8 +602,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; - WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie); - WRITE_ONCE(iocb->private, dio->submit.last_queue); + WRITE_ONCE(iocb->private, dio->submit.poll_bio); /* * We are about to drop our additional submission reference, which @@ -620,10 +629,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->submit.waiter)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !dio->submit.last_queue || - !blk_poll(dio->submit.last_queue, - dio->submit.cookie, true)) + if (!dio->submit.poll_bio || + !bio_poll(dio->submit.poll_bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ab4f3362466d..373dbb627657 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -5,6 +5,7 @@ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. */ +#include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> #include <linux/gfp.h> diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 43b1451bff53..a3cd3c3f091e 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -8,6 +8,7 @@ */ #include <linux/backing-dev.h> +#include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/compat.h> #include <linux/falloc.h> diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index c1bb4c4b5d67..e5e3e500ed46 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -10,7 +10,7 @@ * Linux VFS inode operations. */ -#include <linux/bvec.h> +#include <linux/blkdev.h> #include <linux/fileattr.h> #include "protocol.h" #include "orangefs-kernel.h" diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 2f2e430461b2..8bb0a53a658b 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -11,6 +11,7 @@ #include <linux/parser.h> #include <linux/hashtable.h> +#include <linux/seq_file.h> /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ static struct kmem_cache *orangefs_inode_cache; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2bcc9a6f1bfc..052f143e2e0e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -10,6 +10,7 @@ #include <linux/namei.h> #include <linux/slab.h> #include <asm/current.h> +#include <linux/blkdev.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/security.h> diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 65e7e56005b8..e2302342a67f 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -38,6 +38,7 @@ #include <linux/uaccess.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> +#include <linux/seq_file.h> #include "internal.h" struct ramfs_mount_opts { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7aa943edfc02..62e7fbe4e54c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1452,7 +1452,7 @@ const struct file_operations xfs_file_operations = { .write_iter = xfs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index ddc346a9df9b..3ce5f47338cb 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1128,7 +1128,7 @@ static const struct file_operations zonefs_file_operations = { .write_iter = zonefs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, }; static struct kmem_cache *zonefs_inode_cachep; |