From 1a0b7cd82657a590f163b090bd9123a3a6b9aae4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Jul 2011 11:39:25 +1000 Subject: md/raid10: record bad blocks due to write errors during resync/recovery. If we get a write error during resync/recovery don't fail the device but instead record a bad block. If that fails we can then fail the device. Signed-off-by: NeilBrown --- drivers/md/raid10.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 10415ddfcb42..e54ff3274eda 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1452,9 +1452,10 @@ static void end_sync_write(struct bio *bio, int error) d = find_bio_disk(conf, r10_bio, bio, &slot); - if (!uptodate) - md_error(mddev, conf->mirrors[d].rdev); - else if (is_badblock(conf->mirrors[d].rdev, + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); + set_bit(R10BIO_WriteError, &r10_bio->state); + } else if (is_badblock(conf->mirrors[d].rdev, r10_bio->devs[slot].addr, r10_bio->sectors, &first_bad, &bad_sectors)) @@ -1465,7 +1466,8 @@ static void end_sync_write(struct bio *bio, int error) if (r10_bio->master_bio == NULL) { /* the primary of several recovery bios */ sector_t s = r10_bio->sectors; - if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) reschedule_retry(r10_bio); else put_buf(r10_bio); @@ -1473,7 +1475,8 @@ static void end_sync_write(struct bio *bio, int error) break; } else { r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; - if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) reschedule_retry(r10_bio); else put_buf(r10_bio); @@ -2029,23 +2032,33 @@ static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio) /* Some sort of write request has finished and it * succeeded in writing where we thought there was a * bad block. So forget the bad block. + * Or possibly if failed and we need to record + * a bad block. */ int m; mdk_rdev_t *rdev; if (test_bit(R10BIO_IsSync, &r10_bio->state) || test_bit(R10BIO_IsRecover, &r10_bio->state)) { - for (m = 0; m < conf->copies; m++) - if (r10_bio->devs[m].bio && - test_bit(BIO_UPTODATE, + for (m = 0; m < conf->copies; m++) { + int dev = r10_bio->devs[m].devnum; + rdev = conf->mirrors[dev].rdev; + if (r10_bio->devs[m].bio == NULL) + continue; + if (test_bit(BIO_UPTODATE, &r10_bio->devs[m].bio->bi_flags)) { - int dev = r10_bio->devs[m].devnum; - rdev = conf->mirrors[dev].rdev; rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, r10_bio->sectors); + } else { + if (!rdev_set_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors, 0)) + md_error(conf->mddev, rdev); } + } put_buf(r10_bio); } else { for (m = 0; m < conf->copies; m++) { -- cgit v1.2.3