md: allow last device to be forcibly removed from RAID1/RAID10.

When the 'last' device in a RAID1 or RAID10 reports an error, we do not mark it as failed. This would serve little purpose as there is no risk of losing data beyond that which is obviously lost (as there is with RAID5), and there could be other sectors on the device which are readable, and only readable from this device. This in general this maximises access to data. However the current implementation also stops an admin from removing the last device by direct action. This is rarely useful, but in many case is not harmful and can make automation easier by removing special cases. Also, if an attempt to write metadata fails the device must be marked as faulty, else an infinite loop will result, attempting to update the metadata on all non-faulty devices. So add 'fail_last_dev' member to 'struct mddev', then we can bypasses the 'last disk' checks for RAID1 and RAID10, and control the behavior per array by change sysfs node. Signed-off-by: NeilBrown <neilb@suse.de> [add sysfs node for fail_last_dev by Guoqing] Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com> Signed-off-by: Song Liu <songliubraving@fb.com>
author: Guoqing Jiang 2019-07-24 11:09:19 +0200
committer: Song Liu 2019-08-07 10:25:02 -0700
commit: 9a567843f7ce0037bfd4d5fdc58a09d0a527b28b (patch)
tree: aa29a87219000763dc42b289f2141fec5bc053f6 /drivers/md
parent: cf89160793c439dca00e2563d0b7f153c274027b (diff)
4 files changed, 36 insertions, 6 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3f1252440ad0..67b90d547f8b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5178,6 +5178,34 @@ static struct md_sysfs_entry md_consistency_policy =
 __ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
        consistency_policy_store);
 
+static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
+{
+	return sprintf(page, "%d\n", mddev->fail_last_dev);
+}
+
+/*
+ * Setting fail_last_dev to true to allow last device to be forcibly removed
+ * from RAID1/RAID10.
+ */
+static ssize_t
+fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
+{
+	int ret;
+	bool value;
+
+	ret = kstrtobool(buf, &value);
+	if (ret)
+		return ret;
+
+	if (value != mddev->fail_last_dev)
+		mddev->fail_last_dev = value;
+
+	return len;
+}
+static struct md_sysfs_entry md_fail_last_dev =
+__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
+       fail_last_dev_store);
+
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
 	&md_layout.attr,
@@ -5194,6 +5222,7 @@ static struct attribute *md_default_attrs[] = {
 	&md_array_size.attr,
 	&max_corr_read_errors.attr,
 	&md_consistency_policy.attr,
+	&md_fail_last_dev.attr,
 	NULL,
 };
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 10f98200e2f8..b742659150a2 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -487,6 +487,7 @@ struct mddev {
 	unsigned int			good_device_nr;	/* good device num within cluster raid */
 
 	bool	has_superblocks:1;
+	bool	fail_last_dev:1;
 };
 
 enum recovery_flags {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7ffbd8112400..cd80f281b95d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1617,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	/*
 	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
-	 * next level up know.
+	 * else if it is the last working disks with "fail_last_dev == false",
+	 * ignore the error, let the next level up know.
 	 * else mark the drive as failed
 	 */
 	spin_lock_irqsave(&conf->device_lock, flags);
-	if (test_bit(In_sync, &rdev->flags)
+	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
 	    && (conf->raid_disks - mddev->degraded) == 1) {
 		/*
 		 * Don't fail the drive, act as though we were just a
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a982e040b609..299c7b1c9718 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1640,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	/*
 	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
-	 * next level up know.
+	 * else if it is the last working disks with "fail_last_dev == false",
+	 * ignore the error, let the next level up know.
 	 * else mark the drive as failed
 	 */
 	spin_lock_irqsave(&conf->device_lock, flags);
-	if (test_bit(In_sync, &rdev->flags)
+	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
 	    && !enough(conf, rdev->raid_disk)) {
 		/*
 		 * Don't fail the drive, just return an IO error.
author	Guoqing Jiang	2019-07-24 11:09:19 +0200
committer	Song Liu	2019-08-07 10:25:02 -0700
commit	9a567843f7ce0037bfd4d5fdc58a09d0a527b28b (patch)
tree	aa29a87219000763dc42b289f2141fec5bc053f6 /drivers/md
parent	cf89160793c439dca00e2563d0b7f153c274027b (diff)