diff options
author | Linus Torvalds | 2020-06-19 13:11:26 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-06-19 13:11:26 -0700 |
commit | d2b1c81f5f6c3427bd900be5fed306ca83d649d4 (patch) | |
tree | b678c7fe0a28c205aca3b5a6fe6f7a9ff0bf7295 | |
parent | 592be758f196ed4610e326fc3f33dadd80aa7c6b (diff) | |
parent | 3373a3461aa15b7f9a871fa4cb2c9ef21ac20b47 (diff) |
Merge tag 'block-5.8-2020-06-19' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- Use import_uuid() where appropriate (Andy)
- bcache fixes (Coly, Mauricio, Zhiqiang)
- blktrace sparse warnings fix (Jan)
- blktrace concurrent setup fix (Luis)
- blkdev_get use-after-free fix (Jason)
- Ensure all blk-mq maps are updated (Weiping)
- Loop invalidate bdev fix (Zheng)
* tag 'block-5.8-2020-06-19' of git://git.kernel.dk/linux-block:
block: make function 'kill_bdev' static
loop: replace kill_bdev with invalidate_bdev
partitions/ldm: Replace uuid_copy() with import_uuid() where it makes sense
block: update hctx map when use multiple maps
blktrace: Avoid sparse warnings when assigning q->blk_trace
blktrace: break out of blktrace setup on concurrent calls
block: Fix use-after-free in blkdev_get()
trace/events/block.h: drop kernel-doc for dropped function parameter
blk-mq: Remove redundant 'return' statement
bcache: pr_info() format clean up in bcache_device_init()
bcache: use delayed kworker fo asynchronous devices registration
bcache: check and adjust logical block size for backing devices
bcache: fix potential deadlock problem in btree_gc_coalesce
-rw-r--r-- | block/blk-mq-tag.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 4 | ||||
-rw-r--r-- | block/partitions/ldm.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 8 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 35 | ||||
-rw-r--r-- | fs/block_dev.c | 17 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/trace/events/block.h | 1 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 30 |
10 files changed, 70 insertions, 39 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 44f3d0967cb4..ae722f8b13fb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -376,7 +376,7 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { - return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); + __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 4f57d27bfa73..a9aa6d1e44cf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3479,7 +3479,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) nr_hw_queues = nr_cpu_ids; - if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues) + if (nr_hw_queues < 1) + return; + if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; list_for_each_entry(q, &set->tag_list, tag_set_list) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 6fdfcb40c537..d333786b5c7e 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -910,7 +910,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) return false; disk = &vb->vblk.disk; - uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name)); + import_uuid(&disk->disk_id, buffer + 0x18 + r_name); return true; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c33bbbfd1bd9..475e1a738560 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1368,14 +1368,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_sizelimit != info->lo_sizelimit) { size_changed = true; sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); } /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { - /* If any pages were dirtied after kill_bdev(), try again */ + /* If any pages were dirtied after invalidate_bdev(), try again */ err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, @@ -1615,11 +1615,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) return 0; sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - /* kill_bdev should have truncated all the pages */ + /* invalidate_bdev should have truncated all the pages */ if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 39de94edd73a..6548a601edf0 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1389,7 +1389,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (__set_blocks(n1, n1->keys + n2->keys, block_bytes(b->c)) > btree_blocks(new_nodes[i])) - goto out_nocoalesce; + goto out_unlock_nocoalesce; keys = n2->keys; /* Take the key of the node we're getting rid of */ @@ -1418,7 +1418,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (__bch_keylist_realloc(&keylist, bkey_u64s(&new_nodes[i]->key))) - goto out_nocoalesce; + goto out_unlock_nocoalesce; bch_btree_node_write(new_nodes[i], &cl); bch_keylist_add(&keylist, &new_nodes[i]->key); @@ -1464,6 +1464,10 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, /* Invalidated our iterator */ return -EINTR; +out_unlock_nocoalesce: + for (i = 0; i < nodes; i++) + mutex_unlock(&new_nodes[i]->write_lock); + out_nocoalesce: closure_sync(&cl); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f9975c22bf7e..2014016f9a60 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -19,6 +19,7 @@ #include <linux/genhd.h> #include <linux/idr.h> #include <linux/kthread.h> +#include <linux/workqueue.h> #include <linux/module.h> #include <linux/random.h> #include <linux/reboot.h> @@ -819,7 +820,8 @@ static void bcache_device_free(struct bcache_device *d) } static int bcache_device_init(struct bcache_device *d, unsigned int block_size, - sector_t sectors, make_request_fn make_request_fn) + sector_t sectors, make_request_fn make_request_fn, + struct block_device *cached_bdev) { struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, @@ -885,6 +887,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, q->limits.io_min = block_size; q->limits.logical_block_size = block_size; q->limits.physical_block_size = block_size; + + if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { + /* + * This should only happen with BCACHE_SB_VERSION_BDEV. + * Block/page size is checked for BCACHE_SB_VERSION_CDEV. + */ + pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", + d->disk->disk_name, q->limits.logical_block_size, + PAGE_SIZE, bdev_logical_block_size(cached_bdev)); + + /* This also adjusts physical block size/min io size if needed */ + blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); + } + blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); @@ -1340,7 +1356,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) ret = bcache_device_init(&dc->disk, block_size, dc->bdev->bd_part->nr_sects - dc->sb.data_offset, - cached_dev_make_request); + cached_dev_make_request, dc->bdev); if (ret) return ret; @@ -1453,7 +1469,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); if (bcache_device_init(d, block_bytes(c), u->sectors, - flash_dev_make_request)) + flash_dev_make_request, NULL)) goto err; bcache_device_attach(d, c, u - c->uuids); @@ -2364,7 +2380,7 @@ static bool bch_is_open(struct block_device *bdev) } struct async_reg_args { - struct work_struct reg_work; + struct delayed_work reg_work; char *path; struct cache_sb *sb; struct cache_sb_disk *sb_disk; @@ -2375,7 +2391,7 @@ static void register_bdev_worker(struct work_struct *work) { int fail = false; struct async_reg_args *args = - container_of(work, struct async_reg_args, reg_work); + container_of(work, struct async_reg_args, reg_work.work); struct cached_dev *dc; dc = kzalloc(sizeof(*dc), GFP_KERNEL); @@ -2405,7 +2421,7 @@ static void register_cache_worker(struct work_struct *work) { int fail = false; struct async_reg_args *args = - container_of(work, struct async_reg_args, reg_work); + container_of(work, struct async_reg_args, reg_work.work); struct cache *ca; ca = kzalloc(sizeof(*ca), GFP_KERNEL); @@ -2433,11 +2449,12 @@ out: static void register_device_aync(struct async_reg_args *args) { if (SB_IS_BDEV(args->sb)) - INIT_WORK(&args->reg_work, register_bdev_worker); + INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); else - INIT_WORK(&args->reg_work, register_cache_worker); + INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); - queue_work(system_wq, &args->reg_work); + /* 10 jiffies is enough for a delay */ + queue_delayed_work(system_wq, &args->reg_work, 10); } static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, diff --git a/fs/block_dev.c b/fs/block_dev.c index 47860e589388..0ae656e022fd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -75,7 +75,7 @@ static void bdev_write_inode(struct block_device *bdev) } /* Kill _all_ buffers and pagecache , dirty or not.. */ -void kill_bdev(struct block_device *bdev) +static void kill_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; @@ -84,8 +84,7 @@ void kill_bdev(struct block_device *bdev) invalidate_bh_lrus(); truncate_inode_pages(mapping, 0); -} -EXPORT_SYMBOL(kill_bdev); +} /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) @@ -1565,10 +1564,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) */ if (!for_part) { ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) { - bdput(bdev); + if (ret != 0) return ret; - } } restart: @@ -1637,8 +1634,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, mode, 1); - if (ret) + if (ret) { + bdput(whole); goto out_clear; + } bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || @@ -1688,7 +1687,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_unblock_events(disk); put_disk_and_module(disk); out: - bdput(bdev); return ret; } @@ -1755,6 +1753,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) bdput(whole); } + if (res) + bdput(bdev); + return res; } EXPORT_SYMBOL(blkdev_get); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..3f881a892ea7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2592,7 +2592,6 @@ extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern void emergency_thaw_bdev(struct super_block *sb); @@ -2608,7 +2607,6 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {} static inline struct super_block *freeze_bdev(struct block_device *sb) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 1257f26bb887..93b114226af8 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -254,7 +254,6 @@ TRACE_EVENT(block_bio_bounce, * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed - * @error: io error value * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5773f0ba7e76..5ef0484513ec 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -3,6 +3,9 @@ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/blktrace_api.h> @@ -344,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (!bt) return -EINVAL; @@ -494,6 +498,17 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, */ strreplace(buts->name, '/', '_'); + /* + * bdev can be NULL, as with scsi-generic, this is a helpful as + * we can be. + */ + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { + pr_warn("Concurrent blktraces are not allowed on %s\n", + buts->name); + return -EBUSY; + } + bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; @@ -543,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto err; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); ret = 0; @@ -1629,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -1661,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q, blk_trace_setup_lba(bt, bdev); - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto free_bt; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); return 0; |