aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/md.c34
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid10.c74
3 files changed, 112 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 859edbf8c9b0..f1b905a20133 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -68,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
+ * Default number of read corrections we'll attempt on an rdev
+ * before ejecting it from the array. We divide the read error
+ * count by 2 for every hour elapsed between read errors.
+ */
+#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
+/*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 1000 KB/sec, so the extra system load does not show up that much.
* Increase it if you want to have more _guaranteed_ speed. Note that
@@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
rdev->flags = 0;
rdev->data_offset = 0;
rdev->sb_events = 0;
+ rdev->last_read_error.tv_sec = 0;
+ rdev->last_read_error.tv_nsec = 0;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
@@ -3290,6 +3298,29 @@ static struct md_sysfs_entry md_array_state =
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
static ssize_t
+max_corrected_read_errors_show(mddev_t *mddev, char *page) {
+ return sprintf(page, "%d\n",
+ atomic_read(&mddev->max_corr_read_errors));
+}
+
+static ssize_t
+max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long n = simple_strtoul(buf, &e, 10);
+
+ if (*buf && (*e == 0 || *e == '\n')) {
+ atomic_set(&mddev->max_corr_read_errors, n);
+ return len;
+ }
+ return -EINVAL;
+}
+
+static struct md_sysfs_entry max_corr_read_errors =
+__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
+ max_corrected_read_errors_store);
+
+static ssize_t
null_show(mddev_t *mddev, char *page)
{
return -EINVAL;
@@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = {
&md_array_state.attr,
&md_reshape_position.attr,
&md_array_size.attr,
+ &max_corr_read_errors.attr,
NULL,
};
@@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev)
mddev->ro = 0;
atomic_set(&mddev->writes_pending,0);
+ atomic_set(&mddev->max_corr_read_errors,
+ MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
mddev->safemode = 0;
mddev->safemode_timer.function = md_safemode_timeout;
mddev->safemode_timer.data = (unsigned long) mddev;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d9138885b87f..8e4c75c00d46 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -97,6 +97,9 @@ struct mdk_rdev_s
atomic_t read_errors; /* number of consecutive read errors that
* we have tried to ignore.
*/
+ struct timespec last_read_error; /* monotonic time since our
+ * last read error
+ */
atomic_t corrected_errors; /* number of corrected read errors,
* for reporting to userspace and storing
* in superblock.
@@ -299,6 +302,7 @@ struct mddev_s
int external;
} bitmap_info;
+ atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs;
/* Generic barrier handling.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 670449f7411f..5c71a462c120 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
/*
+ * Used by fix_read_error() to decay the per rdev read_errors.
+ * We halve the read error count for every hour that has elapsed
+ * since the last recorded read error.
+ *
+ */
+static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ struct timespec cur_time_mon;
+ unsigned long hours_since_last;
+ unsigned int read_errors = atomic_read(&rdev->read_errors);
+
+ ktime_get_ts(&cur_time_mon);
+
+ if (rdev->last_read_error.tv_sec == 0 &&
+ rdev->last_read_error.tv_nsec == 0) {
+ /* first time we've seen a read error */
+ rdev->last_read_error = cur_time_mon;
+ return;
+ }
+
+ hours_since_last = (cur_time_mon.tv_sec -
+ rdev->last_read_error.tv_sec) / 3600;
+
+ rdev->last_read_error = cur_time_mon;
+
+ /*
+ * if hours_since_last is > the number of bits in read_errors
+ * just set read errors to 0. We do this to avoid
+ * overflowing the shift of read_errors by hours_since_last.
+ */
+ if (hours_since_last >= 8 * sizeof(read_errors))
+ atomic_set(&rdev->read_errors, 0);
+ else
+ atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
+}
+
+/*
* This is a kernel thread which:
*
* 1. Retries failed read operations on working mirrors.
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
mdk_rdev_t*rdev;
+ int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
+
+ rcu_read_lock();
+ {
+ int d = r10_bio->devs[r10_bio->read_slot].devnum;
+ char b[BDEVNAME_SIZE];
+ int cur_read_error_count = 0;
+
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ bdevname(rdev->bdev, b);
+
+ if (test_bit(Faulty, &rdev->flags)) {
+ rcu_read_unlock();
+ /* drive has already been failed, just ignore any
+ more fix_read_error() attempts */
+ return;
+ }
+
+ check_decay_read_errors(mddev, rdev);
+ atomic_inc(&rdev->read_errors);
+ cur_read_error_count = atomic_read(&rdev->read_errors);
+ if (cur_read_error_count > max_read_errors) {
+ rcu_read_unlock();
+ printk(KERN_NOTICE
+ "raid10: %s: Raid device exceeded "
+ "read_error threshold "
+ "[cur %d:max %d]\n",
+ b, cur_read_error_count, max_read_errors);
+ printk(KERN_NOTICE
+ "raid10: %s: Failing raid "
+ "device\n", b);
+ md_error(mddev, conf->mirrors[d].rdev);
+ return;
+ }
+ }
+ rcu_read_unlock();
+
while(sectors) {
int s = sectors;
int sl = r10_bio->read_slot;