aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/extent-io-tree.h3
-rw-r--r--fs/btrfs/extent_io.c163
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/misc.h35
-rw-r--r--include/trace/events/btrfs.h1
7 files changed, 96 insertions, 120 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b160b8e124e0..108af52ba870 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -94,7 +94,8 @@ struct btrfs_inode {
/* special utility tree used to record which mirrors have already been
* tried when checksums fail for a given block
*/
- struct extent_io_tree io_failure_tree;
+ struct rb_root io_failure_tree;
+ spinlock_t io_failure_lock;
/*
* Keep track of where the inode has extent items mapped in order to
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 5584968643eb..ee2ba4b6e4a1 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -56,7 +56,6 @@ enum {
IO_TREE_FS_EXCLUDED_EXTENTS,
IO_TREE_BTREE_INODE_IO,
IO_TREE_INODE_IO,
- IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
@@ -89,8 +88,6 @@ struct extent_state {
refcount_t refs;
u32 state;
- struct io_failure_record *failrec;
-
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 156ab8cc1acc..1ab14fa7f837 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -326,7 +326,6 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
if (!state)
return state;
state->state = 0;
- state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
refcount_set(&state->refs, 1);
@@ -2159,64 +2158,29 @@ out:
return total_bytes;
}
-/*
- * set the private field for a given byte offset in the tree. If there isn't
- * an extent_state there already, this does nothing.
- */
-static int set_state_failrec(struct extent_io_tree *tree, u64 start,
- struct io_failure_record *failrec)
+static int insert_failrec(struct btrfs_inode *inode,
+ struct io_failure_record *failrec)
{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 0;
+ struct rb_node *exist;
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- ret = -ENOENT;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- ret = -ENOENT;
- goto out;
- }
- state->failrec = failrec;
-out:
- spin_unlock(&tree->lock);
- return ret;
+ spin_lock(&inode->io_failure_lock);
+ exist = rb_simple_insert(&inode->io_failure_tree, failrec->bytenr,
+ &failrec->rb_node);
+ spin_unlock(&inode->io_failure_lock);
+
+ return (exist == NULL) ? 0 : -EEXIST;
}
-static struct io_failure_record *get_state_failrec(struct extent_io_tree *tree,
- u64 start)
+static struct io_failure_record *get_failrec(struct btrfs_inode *inode, u64 start)
{
struct rb_node *node;
- struct extent_state *state;
- struct io_failure_record *failrec;
+ struct io_failure_record *failrec = ERR_PTR(-ENOENT);
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- failrec = ERR_PTR(-ENOENT);
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- failrec = ERR_PTR(-ENOENT);
- goto out;
- }
-
- failrec = state->failrec;
-out:
- spin_unlock(&tree->lock);
+ spin_lock(&inode->io_failure_lock);
+ node = rb_simple_search(&inode->io_failure_tree, start);
+ if (node)
+ failrec = rb_entry(node, struct io_failure_record, rb_node);
+ spin_unlock(&inode->io_failure_lock);
return failrec;
}
@@ -2276,28 +2240,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
return bitset;
}
-static int free_io_failure(struct extent_io_tree *failure_tree,
- struct extent_io_tree *io_tree,
+static int free_io_failure(struct btrfs_inode *inode,
struct io_failure_record *rec)
{
int ret;
- int err = 0;
- set_state_failrec(failure_tree, rec->start, NULL);
- ret = clear_extent_bits(failure_tree, rec->start,
- rec->start + rec->len - 1,
- EXTENT_LOCKED | EXTENT_DIRTY);
- if (ret)
- err = ret;
+ spin_lock(&inode->io_failure_lock);
+ rb_erase(&rec->rb_node, &inode->io_failure_tree);
+ spin_unlock(&inode->io_failure_lock);
- ret = clear_extent_bits(io_tree, rec->start,
- rec->start + rec->len - 1,
+ ret = clear_extent_bits(&inode->io_tree, rec->bytenr,
+ rec->bytenr + rec->len - 1,
EXTENT_DAMAGED);
- if (ret && !err)
- err = ret;
-
kfree(rec);
- return err;
+ return ret;
}
/*
@@ -2436,22 +2392,13 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
struct page *page, unsigned int pg_offset)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct extent_io_tree *io_tree = &inode->io_tree;
u64 ino = btrfs_ino(inode);
- u64 private;
struct io_failure_record *failrec;
struct extent_state *state;
int mirror;
- int ret;
-
- private = 0;
- ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
- EXTENT_DIRTY, 0);
- if (!ret)
- return 0;
- failrec = get_state_failrec(failure_tree, start);
+ failrec = get_failrec(inode, start);
if (IS_ERR(failrec))
return 0;
@@ -2462,12 +2409,12 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
spin_lock(&io_tree->lock);
state = find_first_extent_bit_state(io_tree,
- failrec->start,
+ failrec->bytenr,
EXTENT_LOCKED);
spin_unlock(&io_tree->lock);
- if (!state || state->start > failrec->start ||
- state->end < failrec->start + failrec->len - 1)
+ if (!state || state->start > failrec->bytenr ||
+ state->end < failrec->bytenr + failrec->len - 1)
goto out;
mirror = failrec->this_mirror;
@@ -2478,7 +2425,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
} while (mirror != failrec->failed_mirror);
out:
- free_io_failure(failure_tree, io_tree, failrec);
+ free_io_failure(inode, failrec);
return 0;
}
@@ -2490,30 +2437,26 @@ out:
*/
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
{
- struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct io_failure_record *failrec;
- struct extent_state *state, *next;
+ struct rb_node *node, *next;
- if (RB_EMPTY_ROOT(&failure_tree->state))
+ if (RB_EMPTY_ROOT(&inode->io_failure_tree))
return;
- spin_lock(&failure_tree->lock);
- state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
- while (state) {
- if (state->start > end)
+ spin_lock(&inode->io_failure_lock);
+ node = rb_simple_search_first(&inode->io_failure_tree, start);
+ while (node) {
+ failrec = rb_entry(node, struct io_failure_record, rb_node);
+ if (failrec->bytenr > end)
break;
- ASSERT(state->end <= end);
-
- next = next_state(state);
-
- failrec = state->failrec;
- free_extent_state(state);
+ next = rb_next(node);
+ rb_erase(&failrec->rb_node, &inode->io_failure_tree);
kfree(failrec);
- state = next;
+ node = next;
}
- spin_unlock(&failure_tree->lock);
+ spin_unlock(&inode->io_failure_lock);
}
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
@@ -2523,16 +2466,15 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 start = bbio->file_offset + bio_offset;
struct io_failure_record *failrec;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u32 sectorsize = fs_info->sectorsize;
int ret;
- failrec = get_state_failrec(failure_tree, start);
+ failrec = get_failrec(BTRFS_I(inode), start);
if (!IS_ERR(failrec)) {
btrfs_debug(fs_info,
"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
- failrec->logical, failrec->start, failrec->len);
+ failrec->logical, failrec->bytenr, failrec->len);
/*
* when data can be on disk more than twice, add to failrec here
* (e.g. with a list for failed_mirror) to make
@@ -2547,7 +2489,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
if (!failrec)
return ERR_PTR(-ENOMEM);
- failrec->start = start;
+ RB_CLEAR_NODE(&failrec->rb_node);
+ failrec->bytenr = start;
failrec->len = sectorsize;
failrec->failed_mirror = bbio->mirror_num;
failrec->this_mirror = bbio->mirror_num;
@@ -2572,17 +2515,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
}
/* Set the bits in the private failure tree */
- ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
- EXTENT_LOCKED | EXTENT_DIRTY);
- if (ret >= 0) {
- ret = set_state_failrec(failure_tree, start, failrec);
- /* Set the bits in the inode's tree */
- ret = set_extent_bits(tree, start, start + sectorsize - 1,
- EXTENT_DAMAGED);
- } else if (ret < 0) {
+ ret = insert_failrec(BTRFS_I(inode), failrec);
+ if (ret) {
kfree(failrec);
return ERR_PTR(ret);
}
+ ret = set_extent_bits(tree, start, start + sectorsize - 1,
+ EXTENT_DAMAGED);
+ if (ret) {
+ free_io_failure(BTRFS_I(inode), failrec);
+ return ERR_PTR(ret);
+ }
return failrec;
}
@@ -2594,8 +2537,6 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
u64 start = failed_bbio->file_offset + bio_offset;
struct io_failure_record *failrec;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *failed_bio = &failed_bbio->bio;
const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio;
@@ -2624,7 +2565,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
btrfs_debug(fs_info,
"failed to repair num_copies %d this_mirror %d failed_mirror %d",
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
- free_io_failure(failure_tree, tree, failrec);
+ free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 69a86ae6fd50..1c4717669073 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -254,8 +254,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
+ /* Use rb_simple_node for search/insert */
+ struct {
+ struct rb_node rb_node;
+ u64 bytenr;
+ };
struct page *page;
- u64 start;
u64 len;
u64 logical;
int this_mirror;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 548665299e57..8def3a67adb7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8790,6 +8790,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
+ spin_lock_init(&ei->io_failure_lock);
ei->outstanding_extents = 0;
if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
@@ -8806,12 +8807,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
- extent_io_tree_init(fs_info, &ei->io_failure_tree,
- IO_TREE_INODE_IO_FAILURE, inode);
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT, inode);
+ ei->io_failure_tree = RB_ROOT;
ei->io_tree.track_uptodate = true;
- ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 340f995652f2..f9850edfd726 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
return NULL;
}
+/*
+ * Search @root from an entry that starts or comes after @bytenr.
+ *
+ * @root: the root to search.
+ * @bytenr: bytenr to search from.
+ *
+ * Return the rb_node that start at or after @bytenr. If there is no entry at
+ * or after @bytner return NULL.
+ */
+static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
+ u64 bytenr)
+{
+ struct rb_node *node = root->rb_node, *ret = NULL;
+ struct rb_simple_node *entry, *ret_entry = NULL;
+
+ while (node) {
+ entry = rb_entry(node, struct rb_simple_node, rb_node);
+
+ if (bytenr < entry->bytenr) {
+ if (!ret || entry->bytenr < ret_entry->bytenr) {
+ ret = node;
+ ret_entry = entry;
+ }
+
+ node = node->rb_left;
+ } else if (bytenr > entry->bytenr) {
+ node = node->rb_right;
+ } else {
+ return node;
+ }
+ }
+
+ return ret;
+}
+
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node)
{
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 73df80d462dc..4db905311d67 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -84,7 +84,6 @@ struct raid56_bio_trace_info;
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \
EM( IO_TREE_INODE_IO, "INODE_IO") \
- EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \