aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds2020-06-02 15:37:03 -0700
committerLinus Torvalds2020-06-02 15:37:03 -0700
commitbce159d734091fe31340976081577333f52a85e4 (patch)
tree8396be51e6703797a60aefb4992e729f327d27c2 /drivers/md
parent750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff)
parent0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff)
Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "On top of the core changes, here are the block driver changes for this merge window: - NVMe changes: - NVMe over Fibre Channel protocol updates, which also reach over to drivers/scsi/lpfc (James Smart) - namespace revalidation support on the target (Anthony Iliopoulos) - gcc zero length array fix (Arnd Bergmann) - nvmet cleanups (Chaitanya Kulkarni) - misc cleanups and fixes (me, Keith Busch, Sagi Grimberg) - use a SRQ per completion vector (Max Gurtovoy) - fix handling of runtime changes to the queue count (Weiping Zhang) - t10 protection information support for nvme-rdma and nvmet-rdma (Israel Rukshin and Max Gurtovoy) - target side AEN improvements (Chaitanya Kulkarni) - various fixes and minor improvements all over, icluding the nvme part of the lpfc driver" - Floppy code cleanup series (Willy, Denis) - Floppy contention fix (Jiri) - Loop CONFIGURE support (Martijn) - bcache fixes/improvements (Coly, Joe, Colin) - q->queuedata cleanups (Christoph) - Get rid of ioctl_by_bdev (Christoph, Stefan) - md/raid5 allocation fixes (Coly) - zero length array fixes (Gustavo) - swim3 task state fix (Xu)" * tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits) bcache: configure the asynchronous registertion to be experimental bcache: asynchronous devices registration bcache: fix refcount underflow in bcache_device_free() bcache: Convert pr_<level> uses to a more typical style bcache: remove redundant variables i and n lpfc: Fix return value in __lpfc_nvme_ls_abort lpfc: fix axchg pointer reference after free and double frees lpfc: Fix pointer checks and comments in LS receive refactoring nvme: set dma alignment to qword nvmet: cleanups the loop in nvmet_async_events_process nvmet: fix memory leak when removing namespaces and controllers concurrently nvmet-rdma: add metadata/T10-PI support nvmet: add metadata support for block devices nvmet: add metadata/T10-PI support nvme: add Metadata Capabilities enumerations nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len nvmet: rename nvmet_rw_len to nvmet_rw_data_len nvmet: add metadata characteristics for a namespace nvme-rdma: add metadata/T10-PI support nvme-rdma: introduce nvme_rdma_sgl structure ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/Kconfig9
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/bset.c6
-rw-r--r--drivers/md/bcache/btree.c16
-rw-r--r--drivers/md/bcache/extents.c12
-rw-r--r--drivers/md/bcache/io.c8
-rw-r--r--drivers/md/bcache/journal.c34
-rw-r--r--drivers/md/bcache/request.c6
-rw-r--r--drivers/md/bcache/super.c232
-rw-r--r--drivers/md/bcache/sysfs.c8
-rw-r--r--drivers/md/bcache/writeback.c6
-rw-r--r--drivers/md/md-linear.h2
-rw-r--r--drivers/md/md.c71
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid1.h2
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5.c22
18 files changed, 299 insertions, 153 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 6dfa653d30db..bf7dd96db9b3 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
+
+config BCACHE_ASYNC_REGISTRAION
+ bool "Asynchronous device registration (EXPERIMENTAL)"
+ depends on BCACHE
+ help
+ Add a sysfs file /sys/fs/bcache/register_async. Writing registering
+ device path into this file will returns immediately and the real
+ registration work is handled in kernel work queue in asynchronous
+ way.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 74a9849ea164..221e0191b687 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -176,7 +176,7 @@
* - updates to non leaf nodes just happen synchronously (see btree_split()).
*/
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include <linux/bcache.h>
#include <linux/bio.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 4385303836d8..4995fcaefe29 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -6,7 +6,7 @@
* Copyright 2012 Google, Inc.
*/
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include "util.h"
#include "bset.h"
@@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
if (b->ops->key_dump)
b->ops->key_dump(b, k);
else
- pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+ pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
if (next < bset_bkey_last(i) &&
bkey_cmp(k, b->ops->is_extents ?
@@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
- pr_debug("sorted %i keys", out->keys);
+ pr_debug("sorted %i keys\n", out->keys);
}
static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 72856e5f23a3..39de94edd73a 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -619,7 +619,7 @@ retry:
* and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
*/
if (btree_node_journal_flush(b)) {
- pr_debug("bnode %p is flushing by journal, retry", b);
+ pr_debug("bnode %p is flushing by journal, retry\n", b);
mutex_unlock(&b->write_lock);
udelay(1);
goto retry;
@@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c)
c->shrink.batch = c->btree_pages * 2;
if (register_shrinker(&c->shrink))
- pr_warn("bcache: %s: could not register shrinker",
+ pr_warn("bcache: %s: could not register shrinker\n",
__func__);
return 0;
@@ -1054,7 +1054,7 @@ retry:
*/
if (btree_node_journal_flush(b)) {
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p journal_flush set, retry", b);
+ pr_debug("bnode %p journal_flush set, retry\n", b);
udelay(1);
goto retry;
}
@@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c)
schedule_timeout_interruptible(msecs_to_jiffies
(GC_SLEEP_MS));
else if (ret)
- pr_warn("gc failed!");
+ pr_warn("gc failed!\n");
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
@@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg)
struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
- int i, n;
k = p = NULL;
- i = n = 0;
cur_idx = prev_idx = 0;
ret = 0;
@@ -2045,7 +2043,7 @@ int bch_btree_check(struct cache_set *c)
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
- pr_err("fails to run thread bch_btrchk[%d]", i);
+ pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
ret = -ENOMEM;
@@ -2456,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
if (ret) {
struct bkey *k;
- pr_err("error %i", ret);
+ pr_err("error %i\n", ret);
while ((k = bch_keylist_pop(keys)))
bkey_put(c, k);
@@ -2744,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
break;
if (bkey_cmp(&buf->last_scanned, end) >= 0) {
- pr_debug("scan finished");
+ pr_debug("scan finished\n");
break;
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 886710043025..9162af5bb6ec 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
char buf[80];
bch_extent_to_text(buf, sizeof(buf), k);
- pr_err(" %s", buf);
+ pr_cont(" %s", buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
- pr_err(" bucket %zu", n);
+ pr_cont(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
- pr_err(" prio %i",
- PTR_BUCKET(b->c, k, j)->prio);
+ pr_cont(" prio %i",
+ PTR_BUCKET(b->c, k, j)->prio);
}
- pr_err(" %s\n", bch_ptr_status(b->c, k));
+ pr_cont(" %s\n", bch_ptr_status(b->c, k));
}
/* Btree ptrs */
@@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
if (stale && KEY_DIRTY(k)) {
bch_extent_to_text(buf, sizeof(buf), k);
- pr_info("stale dirty pointer, stale %u, key: %s",
+ pr_info("stale dirty pointer, stale %u, key: %s\n",
stale, buf);
}
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 4d93f07f63e5..b25ee33b0d0b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
* we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
*/
if (bio->bi_opf & REQ_RAHEAD) {
- pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore",
+ pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
dc->backing_dev_name);
return;
}
errors = atomic_add_return(1, &dc->io_errors);
if (errors < dc->error_limit)
- pr_err("%s: IO error on backing device, unrecoverable",
+ pr_err("%s: IO error on backing device, unrecoverable\n",
dc->backing_dev_name);
else
bch_cached_dev_error(dc);
@@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca,
errors >>= IO_ERROR_SHIFT;
if (errors < ca->set->error_limit)
- pr_err("%s: IO error on %s%s",
+ pr_err("%s: IO error on %s%s\n",
ca->cache_dev_name, m,
is_read ? ", recovering." : ".");
else
bch_cache_set_error(ca->set,
- "%s: too many IO errors %s",
+ "%s: too many IO errors %s\n",
ca->cache_dev_name, m);
}
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 0e3ff9745ac7..90aac4e2333f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
closure_init_stack(&cl);
- pr_debug("reading %u", bucket_index);
+ pr_debug("reading %u\n", bucket_index);
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
@@ -78,13 +78,13 @@ reread: left = ca->sb.bucket_size - offset;
size_t blocks, bytes = set_bytes(j);
if (j->magic != jset_magic(&ca->sb)) {
- pr_debug("%u: bad magic", bucket_index);
+ pr_debug("%u: bad magic\n", bucket_index);
return ret;
}
if (bytes > left << 9 ||
bytes > PAGE_SIZE << JSET_BITS) {
- pr_info("%u: too big, %zu bytes, offset %u",
+ pr_info("%u: too big, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@@ -93,7 +93,7 @@ reread: left = ca->sb.bucket_size - offset;
goto reread;
if (j->csum != csum_set(j)) {
- pr_info("%u: bad csum, %zu bytes, offset %u",
+ pr_info("%u: bad csum, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
uint64_t seq;
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
- pr_debug("%u journal buckets", ca->sb.njournal_buckets);
+ pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
/*
* Read journal buckets ordered by golden ratio hash to quickly
@@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
* If that fails, check all the buckets we haven't checked
* already
*/
- pr_debug("falling back to linear search");
+ pr_debug("falling back to linear search\n");
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
@@ -233,7 +233,7 @@ bsearch:
/* Binary search */
m = l;
r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
- pr_debug("starting binary search, l %u r %u", l, r);
+ pr_debug("starting binary search, l %u r %u\n", l, r);
while (l + 1 < r) {
seq = list_entry(list->prev, struct journal_replay,
@@ -253,7 +253,7 @@ bsearch:
* Read buckets in reverse order until we stop finding more
* journal entries
*/
- pr_debug("finishing up: m %u njournal_buckets %u",
+ pr_debug("finishing up: m %u njournal_buckets %u\n",
m, ca->sb.njournal_buckets);
l = m;
@@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
if (n != i->j.seq) {
if (n == start && is_discard_enabled(s))
- pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
+ pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
else {
- pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
+ pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
ret = -EIO;
goto err;
@@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
entries++;
}
- pr_info("journal replay done, %i keys in %i entries, seq %llu",
+ pr_info("journal replay done, %i keys in %i entries, seq %llu\n",
keys, entries, end);
err:
while (!list_empty(list)) {
@@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c)
break;
if (btree_node_journal_flush(b))
- pr_err("BUG: flush_write bit should not be set here!");
+ pr_err("BUG: flush_write bit should not be set here!\n");
mutex_lock(&b->write_lock);
@@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c)
for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
- pr_err("BUG: btree_nodes[%d] is NULL", i);
+ pr_err("BUG: btree_nodes[%d] is NULL\n", i);
continue;
}
/* safe to check without holding b->write_lock */
if (!btree_node_journal_flush(b)) {
- pr_err("BUG: bnode %p: journal_flush bit cleaned", b);
+ pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b);
continue;
}
@@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c)
if (!btree_current_write(b)->journal) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p: written by others", b);
+ pr_debug("bnode %p: written by others\n", b);
continue;
}
if (!btree_node_dirty(b)) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p: dirty bit cleaned by others", b);
+ pr_debug("bnode %p: dirty bit cleaned by others\n", b);
continue;
}
@@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j)
j->cur->data->keys = 0;
if (fifo_full(&j->pin))
- pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
+ pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin));
}
static void journal_write_endio(struct bio *bio)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 22b483527176..7acf024e99f3 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio;
- pr_debug("invalidating %i sectors from %llu",
+ pr_debug("invalidating %i sectors from %llu\n",
bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
@@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
- pr_debug("skipping unaligned io");
+ pr_debug("skipping unaligned io\n");
goto skip;
}
@@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio)
*/
if (unlikely(s->iop.writeback &&
bio->bi_opf & REQ_PREFLUSH)) {
- pr_err("Can't flush %s: returned bi_status %i",
+ pr_err("Can't flush %s: returned bi_status %i\n",
dc->backing_dev_name, bio->bi_status);
} else {
/* set to orig_bio->bi_status in bio_complete() */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d98354fa28e3..f9975c22bf7e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
sb->d[i] = le64_to_cpu(s->d[i]);
- pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
+ pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
sb->version, sb->flags, sb->seq, sb->keys);
err = "Not a bcache superblock (bad offset)";
@@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
out->csum = csum_set(out);
- pr_debug("ver %llu, flags %llu, seq %llu",
+ pr_debug("ver %llu, flags %llu, seq %llu\n",
sb->version, sb->flags, sb->seq);
submit_bio(bio);
@@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
}
bch_extent_to_text(buf, sizeof(buf), k);
- pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
+ pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
if (!bch_is_zero(u->uuid, 16))
- pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
+ pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
u - c->uuids, u->uuid, u->label,
u->first_reg, u->last_reg, u->invalidated);
@@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait)
struct bucket *b;
struct closure cl;
- pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
fifo_used(&ca->free[RESERVE_PRIO]),
fifo_used(&ca->free[RESERVE_NONE]),
fifo_used(&ca->free_inc));
@@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket)
if (p->csum !=
bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
- pr_warn("bad csum reading priorities");
+ pr_warn("bad csum reading priorities\n");
goto out;
}
if (p->magic != pset_magic(&ca->sb)) {
- pr_warn("bad magic reading priorities");
+ pr_warn("bad magic reading priorities\n");
goto out;
}
@@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
if (ret < 0)
- pr_err("Couldn't create device -> cache set symlink");
+ pr_err("Couldn't create device -> cache set symlink\n");
ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
if (ret < 0)
- pr_err("Couldn't create cache set -> device symlink");
+ pr_err("Couldn't create cache set -> device symlink\n");
clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
}
@@ -789,15 +789,17 @@ static void bcache_device_free(struct bcache_device *d)
lockdep_assert_held(&bch_register_lock);
if (disk)
- pr_info("%s stopped", disk->disk_name);
+ pr_info("%s stopped\n", disk->disk_name);
else
- pr_err("bcache device (NULL gendisk) stopped");
+ pr_err("bcache device (NULL gendisk) stopped\n");
if (d->c)
bcache_device_detach(d);
if (disk) {
- if (disk->flags & GENHD_FL_UP)
+ bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
+
+ if (disk_added)
del_gendisk(disk);
if (disk->queue)
@@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d)
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
- put_disk(disk);
+ if (disk_added)
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -830,7 +833,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
- pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+ pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n",
(unsigned int)d->nr_stripes);
return -ENOMEM;
}
@@ -928,11 +931,11 @@ static int cached_dev_status_update(void *arg)
dc->offline_seconds = 0;
if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
- pr_err("%s: device offline for %d seconds",
+ pr_err("%s: device offline for %d seconds\n",
dc->backing_dev_name,
BACKING_DEV_OFFLINE_TIMEOUT);
- pr_err("%s: disable I/O request due to backing "
- "device offline", dc->disk.name);
+ pr_err("%s: disable I/O request due to backing device offline\n",
+ dc->disk.name);
dc->io_disable = true;
/* let others know earlier that io_disable is true */
smp_mb();
@@ -959,7 +962,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
};
if (dc->io_disable) {
- pr_err("I/O disabled on cached dev %s",
+ pr_err("I/O disabled on cached dev %s\n",
dc->backing_dev_name);
kfree(env[1]);
kfree(env[2]);
@@ -971,7 +974,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
kfree(env[1]);
kfree(env[2]);
kfree(buf);
- pr_info("cached dev %s is running already",
+ pr_info("cached dev %s is running already\n",
dc->backing_dev_name);
return -EBUSY;
}
@@ -1001,16 +1004,14 @@ int bch_cached_dev_run(struct cached_dev *dc)
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj,
&d->kobj, "bcache")) {
- pr_err("Couldn't create bcache dev <-> disk sysfs symlinks");
+ pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
return -ENOMEM;
}
dc->status_update_thread = kthread_run(cached_dev_status_update,
dc, "bcache_status_update");
if (IS_ERR(dc->status_update_thread)) {
- pr_warn("failed to create bcache_status_update kthread, "
- "continue to run without monitoring backing "
- "device status");
+ pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
}
return 0;
@@ -1036,7 +1037,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
} while (time_out > 0);
if (time_out == 0)
- pr_warn("give up waiting for dc->writeback_write_update to quit");
+ pr_warn("give up waiting for dc->writeback_write_update to quit\n");
cancel_delayed_work_sync(&dc->writeback_rate_update);
}
@@ -1077,7 +1078,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_unlock(&bch_register_lock);
- pr_info("Caching disabled for %s", dc->backing_dev_name);
+ pr_info("Caching disabled for %s\n", dc->backing_dev_name);
/* Drop ref we took in cached_dev_detach() */
closure_put(&dc->disk.cl);
@@ -1117,20 +1118,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
return -ENOENT;
if (dc->disk.c) {
- pr_err("Can't attach %s: already attached",
+ pr_err("Can't attach %s: already attached\n",
dc->backing_dev_name);
return -EINVAL;
}
if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
- pr_err("Can't attach %s: shutting down",
+ pr_err("Can't attach %s: shutting down\n",
dc->backing_dev_name);
return -EINVAL;
}
if (dc->sb.block_size < c->sb.block_size) {
/* Will die */
- pr_err("Couldn't attach %s: block size less than set's block size",
+ pr_err("Couldn't attach %s: block size less than set's block size\n",
dc->backing_dev_name);
return -EINVAL;
}
@@ -1138,7 +1139,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Check whether already attached */
list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
- pr_err("Tried to attach %s but duplicate UUID already attached",
+ pr_err("Tried to attach %s but duplicate UUID already attached\n",
dc->backing_dev_name);
return -EINVAL;
@@ -1157,14 +1158,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (!u) {
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- pr_err("Couldn't find uuid for %s in set",
+ pr_err("Couldn't find uuid for %s in set\n",
dc->backing_dev_name);
return -ENOENT;
}
u = uuid_find_empty(c);
if (!u) {
- pr_err("Not caching %s, no room for UUID",
+ pr_err("Not caching %s, no room for UUID\n",
dc->backing_dev_name);
return -EINVAL;
}
@@ -1210,7 +1211,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
down_write(&dc->writeback_lock);
if (bch_cached_dev_writeback_start(dc)) {
up_write(&dc->writeback_lock);
- pr_err("Couldn't start writeback facilities for %s",
+ pr_err("Couldn't start writeback facilities for %s\n",
dc->disk.disk->disk_name);
return -ENOMEM;
}
@@ -1233,7 +1234,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
*/
kthread_stop(dc->writeback_thread);
cancel_writeback_rate_update_dwork(dc);
- pr_err("Couldn't run cached device %s",
+ pr_err("Couldn't run cached device %s\n",
dc->backing_dev_name);
return ret;
}
@@ -1244,7 +1245,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
- pr_info("Caching %s as %s on set %pU",
+ pr_info("Caching %s as %s on set %pU\n",
dc->backing_dev_name,
dc->disk.disk->disk_name,
dc->disk.c->sb.set_uuid);
@@ -1384,7 +1385,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
- pr_info("registered backing device %s", dc->backing_dev_name);
+ pr_info("registered backing device %s\n", dc->backing_dev_name);
list_add(&dc->list, &uncached_devices);
/* attach to a matched cache set if it exists */
@@ -1401,7 +1402,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
return 0;
err:
- pr_notice("error %s: %s", dc->backing_dev_name, err);
+ pr_notice("error %s: %s\n", dc->backing_dev_name, err);
bcache_device_stop(&dc->disk);
return ret;
}
@@ -1497,7 +1498,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
u = uuid_find_empty(c);
if (!u) {
- pr_err("Can't create volume, no room for UUID");
+ pr_err("Can't create volume, no room for UUID\n");
return -EINVAL;
}
@@ -1523,7 +1524,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
smp_mb();
pr_err("stop %s: too many IO errors on backing device %s\n",
- dc->disk.disk->disk_name, dc->backing_dev_name);
+ dc->disk.disk->disk_name, dc->backing_dev_name);
bcache_device_stop(&dc->disk);
return true;
@@ -1534,6 +1535,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
if (c->on_error != ON_ERROR_PANIC &&
@@ -1541,20 +1543,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
return false;
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
- pr_info("CACHE_SET_IO_DISABLE already set");
+ pr_info("CACHE_SET_IO_DISABLE already set\n");
/*
* XXX: we can be called from atomic context
* acquire_console_sem();
*/
- pr_err("bcache: error on %pU: ", c->sb.set_uuid);
-
va_start(args, fmt);
- vprintk(fmt, args);
- va_end(args);
- pr_err(", disabling caching\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("error on %pU: %pV, disabling caching\n",
+ c->sb.set_uuid, &vaf);
+
+ va_end(args);
if (c->on_error == ON_ERROR_PANIC)
panic("panic forced after error\n");
@@ -1606,7 +1610,7 @@ static void cache_set_free(struct closure *cl)
list_del(&c->list);
mutex_unlock(&bch_register_lock);
- pr_info("Cache set %pU unregistered", c->sb.set_uuid);
+ pr_info("Cache set %pU unregistered\n", c->sb.set_uuid);
wake_up(&unregister_wait);
closure_debug_destroy(&c->cl);
@@ -1677,7 +1681,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
struct cached_dev *dc)
{
if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
- pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
d->disk->disk_name, c->sb.set_uuid);
bcache_device_stop(d);
} else if (atomic_read(&dc->has_dirty)) {
@@ -1685,7 +1689,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 1
*/
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
d->disk->disk_name);
/*
* There might be a small time gap that cache set is
@@ -1707,7 +1711,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 0
*/
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
d->disk->disk_name);
}
}
@@ -1874,7 +1878,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_read(c, &journal))
goto err;
- pr_debug("btree_journal_read() done");
+ pr_debug("btree_journal_read() done\n");
err = "no journal entries found";
if (list_empty(&journal))
@@ -1920,7 +1924,7 @@ static int run_cache_set(struct cache_set *c)
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
- pr_debug("btree_check() done");
+ pr_debug("btree_check() done\n");
/*
* bcache_journal_next() can't happen sooner, or
@@ -1951,7 +1955,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_replay(c, &journal))
goto err;
} else {
- pr_notice("invalidating existing data");
+ pr_notice("invalidating existing data\n");
for_each_cache(ca, c, i) {
unsigned int j;
@@ -2085,7 +2089,7 @@ found:
memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
c->sb.flags = ca->sb.flags;
c->sb.seq = ca->sb.seq;
- pr_debug("set version = %llu", c->sb.version);
+ pr_debug("set version = %llu\n", c->sb.version);
}
kobject_get(&ca->kobj);
@@ -2247,7 +2251,7 @@ err_btree_alloc:
err_free:
module_put(THIS_MODULE);
if (err)
- pr_notice("error %s: %s", ca->cache_dev_name, err);
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@@ -2301,14 +2305,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto out;
}
- pr_info("registered cache device %s", ca->cache_dev_name);
+ pr_info("registered cache device %s\n", ca->cache_dev_name);
out:
kobject_put(&ca->kobj);
err:
if (err)
- pr_notice("error %s: %s", ca->cache_dev_name, err);
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@@ -2323,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
+kobj_attribute_write(register_async, register_bcache);
kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
static bool bch_is_open_backing(struct block_device *bdev)
@@ -2358,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev)
return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}
+struct async_reg_args {
+ struct work_struct reg_work;
+ char *path;
+ struct cache_sb *sb;
+ struct cache_sb_disk *sb_disk;
+ struct block_device *bdev;
+};
+
+static void register_bdev_worker(struct work_struct *work)
+{
+ int fail = false;
+ struct async_reg_args *args =
+ container_of(work, struct async_reg_args, reg_work);
+ struct cached_dev *dc;
+
+ dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+ if (!dc) {
+ fail = true;
+ put_page(virt_to_page(args->sb_disk));
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ goto out;
+ }
+
+ mutex_lock(&bch_register_lock);
+ if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
+ fail = true;
+ mutex_unlock(&bch_register_lock);
+
+out:
+ if (fail)
+ pr_info("error %s: fail to register backing device\n",
+ args->path);
+ kfree(args->sb);
+ kfree(args->path);
+ kfree(args);
+ module_put(THIS_MODULE);
+}
+
+static void register_cache_worker(struct work_struct *work)
+{
+ int fail = false;
+ struct async_reg_args *args =
+ container_of(work, struct async_reg_args, reg_work);
+ struct cache *ca;
+
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca) {
+ fail = true;
+ put_page(virt_to_page(args->sb_disk));
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ goto out;
+ }
+
+ /* blkdev_put() will be called in bch_cache_release() */
+ if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
+ fail = true;
+
+out:
+ if (fail)
+ pr_info("error %s: fail to register cache device\n",
+ args->path);
+ kfree(args->sb);
+ kfree(args->path);
+ kfree(args);
+ module_put(THIS_MODULE);
+}
+
+static void register_device_aync(struct async_reg_args *args)
+{
+ if (SB_IS_BDEV(args->sb))
+ INIT_WORK(&args->reg_work, register_bdev_worker);
+ else
+ INIT_WORK(&args->reg_work, register_cache_worker);
+
+ queue_work(system_wq, &args->reg_work);
+}
+
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
@@ -2420,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
goto out_blkdev_put;
err = "failed to register device";
+ if (attr == &ksysfs_register_async) {
+ /* register in asynchronous way */
+ struct async_reg_args *args =
+ kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
+
+ if (!args) {
+ ret = -ENOMEM;
+ err = "cannot allocate memory";
+ goto out_put_sb_page;
+ }
+
+ args->path = path;
+ args->sb = sb;
+ args->sb_disk = sb_disk;
+ args->bdev = bdev;
+ register_device_aync(args);
+ /* No wait and returns to user space */
+ goto async_done;
+ }
+
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
@@ -2447,6 +2549,7 @@ done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
+async_done:
return size;
out_put_sb_page:
@@ -2461,7 +2564,7 @@ out_free_path:
out_module_put:
module_put(THIS_MODULE);
out:
- pr_info("error %s: %s", path?path:"", err);
+ pr_info("error %s: %s\n", path?path:"", err);
return ret;
}
@@ -2506,7 +2609,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
mutex_unlock(&bch_register_lock);
list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
- pr_info("delete pdev %p", pdev);
+ pr_info("delete pdev %p\n", pdev);
list_del(&pdev->list);
bcache_device_stop(&pdev->dc->disk);
kfree(pdev);
@@ -2549,7 +2652,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
mutex_unlock(&bch_register_lock);
- pr_info("Stopping all devices:");
+ pr_info("Stopping all devices:\n");
/*
* The reason bch_register_lock is not held to call
@@ -2599,9 +2702,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
finish_wait(&unregister_wait, &wait);
if (stopped)
- pr_info("All devices stopped");
+ pr_info("All devices stopped\n");
else
- pr_notice("Timeout waiting for devices to be closed");
+ pr_notice("Timeout waiting for devices to be closed\n");
out:
mutex_unlock(&bch_register_lock);
}
@@ -2637,7 +2740,7 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback_sync == 0)
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
- pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
+ pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
}
@@ -2645,13 +2748,13 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback == 0)
bch_cutoff_writeback = CUTOFF_WRITEBACK;
else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
- pr_warn("set bch_cutoff_writeback (%u) to max value %u",
+ pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
}
if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
- pr_warn("set bch_cutoff_writeback (%u) to %u",
+ pr_warn("set bch_cutoff_writeback (%u) to %u\n",
bch_cutoff_writeback, bch_cutoff_writeback_sync);
bch_cutoff_writeback = bch_cutoff_writeback_sync;
}
@@ -2662,6 +2765,9 @@ static int __init bcache_init(void)
static const struct attribute *files[] = {
&ksysfs_register.attr,
&ksysfs_register_quiet.attr,
+#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION
+ &ksysfs_register_async.attr,
+#endif
&ksysfs_pendings_cleanup.attr,
NULL
};
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 323276994aab..0dadec5a78f6 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -421,7 +421,7 @@ STORE(__cached_dev)
return size;
}
if (v == -ENOENT)
- pr_err("Can't attach %s: cache set not found", buf);
+ pr_err("Can't attach %s: cache set not found\n", buf);
return v;
}
@@ -455,7 +455,7 @@ STORE(bch_cached_dev)
*/
if (dc->writeback_running) {
dc->writeback_running = false;
- pr_err("%s: failed to run non-existent writeback thread",
+ pr_err("%s: failed to run non-existent writeback thread\n",
dc->disk.disk->disk_name);
}
} else
@@ -872,11 +872,11 @@ STORE(__bch_cache_set)
if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
- pr_warn("CACHE_SET_IO_DISABLE already set");
+ pr_warn("CACHE_SET_IO_DISABLE already set\n");
} else {
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
&c->flags))
- pr_warn("CACHE_SET_IO_DISABLE already cleared");
+ pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
}
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3f7641fb28d5..1cf1e5016cb9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c,
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
else if (ret < 0) {
- pr_warn("sectors dirty init failed, ret=%d!", ret);
+ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
break;
}
} while (ret == -EAGAIN);
@@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
if (!state) {
- pr_warn("sectors dirty init failed: cannot allocate memory");
+ pr_warn("sectors dirty init failed: cannot allocate memory\n");
return;
}
@@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
&state->infos[i],
name);
if (IS_ERR(state->infos[i].thread)) {
- pr_err("fails to run thread bch_dirty_init[%d]", i);
+ pr_err("fails to run thread bch_dirty_init[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(state->infos[i].thread);
goto out;
diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h
index 8381d651d4ed..24e97db50ebb 100644
--- a/drivers/md/md-linear.h
+++ b/drivers/md/md-linear.h
@@ -12,6 +12,6 @@ struct linear_conf
struct rcu_head rcu;
sector_t array_sectors;
int raid_disks; /* a copy of mddev->raid_disks */
- struct dev_info disks[0];
+ struct dev_info disks[];
};
#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 271e8a587354..f567f536b529 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -89,6 +89,7 @@ static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq;
+static struct workqueue_struct *md_rdev_misc_wq;
static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
@@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
goto abort;
if (mddev->serial_info_pool == NULL) {
- unsigned int noio_flag;
-
- noio_flag = memalloc_noio_save();
+ /*
+ * already in memalloc noio context by
+ * mddev_suspend()
+ */
mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
- memalloc_noio_restore(noio_flag);
if (!mddev->serial_info_pool) {
rdevs_uninit_serial(mddev);
pr_err("can't alloc memory pool for serialization\n");
@@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
const int sgrp = op_stat_group(bio_op(bio));
- struct mddev *mddev = q->queuedata;
+ struct mddev *mddev = bio->bi_disk->private_data;
unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
@@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev)
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer);
+ /* restrict memory reclaim I/O during raid array is suspend */
+ mddev->noio_flag = memalloc_noio_save();
}
EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
+ /* entred the memalloc scope from mddev_suspend() */
+ memalloc_noio_restore(mddev->noio_flag);
lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
@@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return err;
}
-static void md_delayed_delete(struct work_struct *ws)
+static void rdev_delayed_delete(struct work_struct *ws)
{
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
kobject_del(&rdev->kobj);
@@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
* to delay it due to rcu usage.
*/
synchronize_rcu();
- INIT_WORK(&rdev->del_work, md_delayed_delete);
+ INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj);
- queue_work(md_misc_wq, &rdev->del_work);
+ queue_work(md_rdev_misc_wq, &rdev->del_work);
}
/*
@@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
- err = rdev->mddev->pers->
- hot_add_disk(rdev->mddev, rdev);
+ err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
if (err) {
rdev->raid_disk = -1;
return err;
@@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page)
return -EINVAL;
}
+/* need to ensure rdev_delayed_delete() has completed */
+static void flush_rdev_wq(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (work_pending(&rdev->del_work)) {
+ flush_workqueue(md_rdev_misc_wq);
+ break;
+ }
+ rcu_read_unlock();
+}
+
static ssize_t
new_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
@@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
minor != MINOR(dev))
return -EOVERFLOW;
- flush_workqueue(md_misc_wq);
-
+ flush_rdev_wq(mddev);
err = mddev_lock(mddev);
if (err)
return err;
@@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) {
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name)
mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
- mddev->queue->queuedata = mddev;
blk_set_stacking_limits(&mddev->queue->limits);
@@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev)
md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
- flush_workqueue(md_misc_wq);
+ if (mddev->event_work.func)
+ flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
- if (cmd == ADD_NEW_DISK)
- /* need to ensure md_delayed_delete() has completed */
- flush_workqueue(md_misc_wq);
+ if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
+ flush_rdev_wq(mddev);
if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */
@@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
*/
mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
/* Then retry the open from the top */
return -ERESTARTSYS;
}
@@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0;
}
- if (mddev->pers->
- hot_add_disk(mddev, rdev) == 0) {
+ if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
if (!test_bit(Journal, &rdev->flags))
@@ -9469,6 +9487,10 @@ static int __init md_init(void)
if (!md_misc_wq)
goto err_misc_wq;
+ md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
+ if (!md_misc_wq)
+ goto err_rdev_misc_wq;
+
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
goto err_md;
@@ -9490,6 +9512,8 @@ static int __init md_init(void)
err_mdp:
unregister_blkdev(MD_MAJOR, "md");
err_md:
+ destroy_workqueue(md_rdev_misc_wq);
+err_rdev_misc_wq:
destroy_workqueue(md_misc_wq);
err_misc_wq:
destroy_workqueue(md_wq);
@@ -9776,6 +9800,7 @@ static __exit void md_exit(void)
* destroy_workqueue() below will wait for that to complete.
*/
}
+ destroy_workqueue(md_rdev_misc_wq);
destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq);
}
@@ -9785,7 +9810,7 @@ module_exit(md_exit)
static int get_ro(char *buffer, const struct kernel_param *kp)
{
- return sprintf(buffer, "%d", start_readonly);
+ return sprintf(buffer, "%d\n", start_readonly);
}
static int set_ro(const char *val, const struct kernel_param *kp)
{
diff --git a/drivers/md/md.h b/drivers/md/md.h
index acd681939112..612814d07d35 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -497,6 +497,7 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */
+ unsigned int noio_flag; /* for memalloc scope API */
bool has_superblocks:1;
bool fail_last_dev:1;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cd810e195086..dcd27f3da84e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
static void call_bio_endio(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
- struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
- /*
- * Wake up any possible resync thread that waits for the device
- * to go idle.
- */
- allow_barrier(conf, r1_bio->sector);
}
static void raid_end_bio_io(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
+ struct r1conf *conf = r1_bio->mddev->private;
/* if nobody has done the final endio yet, do it now */
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
@@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
call_bio_endio(r1_bio);
}
+ /*
+ * Wake up any possible resync thread that waits for the device
+ * to go idle. All I/Os, even write-behind writes, are done.
+ */
+ allow_barrier(conf, r1_bio->sector);
+
free_r1bio(r1_bio);
}
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e7ccad898736..b7eb09e8c025 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -180,7 +180,7 @@ struct r1bio {
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
*/
- struct bio *bios[0];
+ struct bio *bios[];
/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
};
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index d3eaaf3eb1bc..79cd2b7d3128 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -153,7 +153,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
- } devs[0];
+ } devs[];
};
/* bits for r10bio.state */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ba00e9877f02..ab8067f9ce8c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num)
}
/**
- * scribble_len - return the required size of the scribble region
+ * scribble_alloc - allocate percpu scribble buffer for required size
+ * of the scribble region
+ * @percpu - from for_each_present_cpu() of the caller
* @num - total number of disks in the array
+ * @cnt - scribble objs count for required size of the scribble region
*
- * The size must be enough to contain:
+ * The scribble buffer size must be enough to contain:
* 1/ a struct page pointer for each device in the array +2
* 2/ room to convert each entry in (1) to its corresponding dma
* (dma_map_page()) or page (page_address()) address.
@@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num)
* of the P and Q blocks.
*/
static int scribble_alloc(struct raid5_percpu *percpu,
- int num, int cnt, gfp_t flags)
+ int num, int cnt)
{
size_t obj_size =
sizeof(struct page *) * (num+2) +
sizeof(addr_conv_t) * (num+2);
void *scribble;
- scribble = kvmalloc_array(cnt, obj_size, flags);
+ /*
+ * If here is in raid array suspend context, it is in memalloc noio
+ * context as well, there is no potential recursive memory reclaim
+ * I/Os with the GFP_KERNEL flag.
+ */
+ scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
if (!scribble)
return -ENOMEM;
@@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
percpu = per_cpu_ptr(conf->percpu, cpu);
err = scribble_alloc(percpu, new_disks,
- new_sectors / STRIPE_SECTORS,
- GFP_NOIO);
+ new_sectors / STRIPE_SECTORS);
if (err)
break;
}
@@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
conf->previous_raid_disks),
max(conf->chunk_sectors,
conf->prev_chunk_sectors)
- / STRIPE_SECTORS,
- GFP_KERNEL)) {
+ / STRIPE_SECTORS)) {
free_scratch_buffer(conf, percpu);
return -ENOMEM;
}