Merge branch '2020-09-14-btrfs-rewrite' into next

- Bring in the update to btrfs support that rewrites it based on btrfs-progs.
author: Tom Rini 2020-09-07 21:00:47 -0400
committer: Tom Rini 2020-09-07 21:00:47 -0400
commit: 2a9f9d633d2e069e5d5e7acde050b338ec803692 (patch)
tree: 40c97310da83790361c498e158ee3814e8e140a7
parent: 314b9b4a38befd120ce1c566d9eea8e0ec9d8336 (diff)
parent: b737c822c0c4f1210568f61e743236bb980ca645 (diff)
30 files changed, 8525 insertions, 2491 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 101f4e185df..332112f6fd0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -599,6 +599,8 @@ F:	tools/binman/
 
 BTRFS
 M:	Marek Behun <marek.behun@nic.cz>
+R:	Qu Wenruo <wqu@suse.com>
+L:	linux-btrfs@vger.kernel.org
 S:	Maintained
 F:	cmd/btrfs.c
 F:	fs/btrfs/
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b3159296ff..fc074c84d28 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -2,5 +2,6 @@
 #
 # 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
 
-obj-y := btrfs.o chunk-map.o compression.o ctree.o dev.o dir-item.o \
-	extent-io.o hash.o inode.o root.o subvolume.o super.o
+obj-y := btrfs.o compression.o ctree.o dev.o dir-item.o \
+	extent-io.o inode.o subvolume.o crypto/hash.o disk-io.o \
+	common/rbtree-utils.o extent-cache.o volumes.o root-tree.o
diff --git a/fs/btrfs/btrfs.c b/fs/btrfs/btrfs.c
index de16217d0dd..cbf9dcffeb2 100644
--- a/fs/btrfs/btrfs.c
+++ b/fs/btrfs/btrfs.c
@@ -5,219 +5,280 @@
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
 #include <config.h>
 #include <malloc.h>
 #include <uuid.h>
 #include <linux/time.h>
+#include "btrfs.h"
+#include "crypto/hash.h"
+#include "disk-io.h"
 
-struct btrfs_info btrfs_info;
+struct btrfs_fs_info *current_fs_info;
 
-static int readdir_callback(const struct btrfs_root *root,
-			    struct btrfs_dir_item *item)
+static int show_dir(struct btrfs_root *root, struct extent_buffer *eb,
+		    struct btrfs_dir_item *di)
 {
-	static const char typestr[BTRFS_FT_MAX][4] = {
-		[BTRFS_FT_UNKNOWN]  = " ? ",
-		[BTRFS_FT_REG_FILE] = "   ",
-		[BTRFS_FT_DIR]      = "DIR",
-		[BTRFS_FT_CHRDEV]   = "CHR",
-		[BTRFS_FT_BLKDEV]   = "BLK",
-		[BTRFS_FT_FIFO]     = "FIF",
-		[BTRFS_FT_SOCK]     = "SCK",
-		[BTRFS_FT_SYMLINK]  = "SYM",
-		[BTRFS_FT_XATTR]    = " ? ",
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_inode_item ii;
+	struct btrfs_key key;
+	static const char* dir_item_str[] = {
+		[BTRFS_FT_REG_FILE]	= "FILE",
+		[BTRFS_FT_DIR] 		= "DIR",
+		[BTRFS_FT_CHRDEV]	= "CHRDEV",
+		[BTRFS_FT_BLKDEV]	= "BLKDEV",
+		[BTRFS_FT_FIFO]		= "FIFO",
+		[BTRFS_FT_SOCK]		= "SOCK",
+		[BTRFS_FT_SYMLINK]	= "SYMLINK",
+		[BTRFS_FT_XATTR]	= "XATTR"
 	};
-	struct btrfs_inode_item inode;
-	const char *name = (const char *) (item + 1);
-	char filetime[32], *target = NULL;
+	u8 type = btrfs_dir_type(eb, di);
+	char namebuf[BTRFS_NAME_LEN];
+	char *target = NULL;
+	char filetime[32];
 	time_t mtime;
+	int ret;
 
-	if (btrfs_lookup_inode(root, &item->location, &inode, NULL)) {
-		printf("%s: Cannot find inode item for directory entry %.*s!\n",
-		       __func__, item->name_len, name);
-		return 0;
-	}
-
-	mtime = inode.mtime.sec;
-	ctime_r(&mtime, filetime);
+	btrfs_dir_item_key_to_cpu(eb, di, &key);
 
-	if (item->type == BTRFS_FT_SYMLINK) {
-		target = malloc(min(inode.size + 1,
-				    (u64) btrfs_info.sb.sectorsize));
+	if (key.type == BTRFS_ROOT_ITEM_KEY) {
+		struct btrfs_root *subvol;
 
-		if (target && btrfs_readlink(root, item->location.objectid,
-					     target)) {
-			free(target);
-			target = NULL;
+		/* It's a subvolume, get its mtime from root item */
+		subvol = btrfs_read_fs_root(fs_info, &key);
+		if (IS_ERR(subvol)) {
+			ret = PTR_ERR(subvol);
+			error("Can't find root %llu", key.objectid);
+			return ret;
 		}
+		mtime = btrfs_stack_timespec_sec(&subvol->root_item.otime);
+	} else {
+		struct btrfs_path path;
+
+		/* It's regular inode, get its mtime from inode item */
+		btrfs_init_path(&path);
+		ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+		if (ret > 0)
+			ret = -ENOENT;
+		if (ret < 0) {
+			error("Can't find inode %llu", key.objectid);
+			btrfs_release_path(&path);
+			return ret;
+		}
+		read_extent_buffer(path.nodes[0], &ii,
+			btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
+			sizeof(ii));
+		btrfs_release_path(&path);
+		mtime = btrfs_stack_timespec_sec(&ii.mtime);
+	}
+	ctime_r(&mtime, filetime);
 
-		if (!target)
-			printf("%s: Cannot read symlink target!\n", __func__);
+	if (type == BTRFS_FT_SYMLINK) {
+		target = malloc(fs_info->sectorsize);
+		if (!target) {
+			error("Can't alloc memory for symlink %llu",
+				key.objectid);
+			return -ENOMEM;
+		}
+		ret = btrfs_readlink(root, key.objectid, target);
+		if (ret < 0) {
+			error("Failed to read symlink %llu", key.objectid);
+			goto out;
+		}
+		target[ret] = '\0';
 	}
 
-	printf("<%s> ", typestr[item->type]);
-	if (item->type == BTRFS_FT_CHRDEV || item->type == BTRFS_FT_BLKDEV)
-		printf("%4u,%5u  ", (unsigned int) (inode.rdev >> 20),
-			(unsigned int) (inode.rdev & 0xfffff));
+	if (type < ARRAY_SIZE(dir_item_str) && dir_item_str[type])
+		printf("<%s> ", dir_item_str[type]);
 	else
-		printf("%10llu  ", inode.size);
-
-	printf("%24.24s  %.*s", filetime, item->name_len, name);
-
-	if (item->type == BTRFS_FT_SYMLINK) {
-		printf(" -> %s", target ? target : "?");
-		if (target)
-			free(target);
+		printf("DIR_ITEM.%u", type);
+	if (type == BTRFS_FT_CHRDEV || type == BTRFS_FT_BLKDEV) {
+		ASSERT(key.type == BTRFS_INODE_ITEM_KEY);
+		printf("%4llu,%5llu  ", btrfs_stack_inode_rdev(&ii) >> 20,
+				btrfs_stack_inode_rdev(&ii) & 0xfffff);
+	} else {
+		if (key.type == BTRFS_INODE_ITEM_KEY)
+			printf("%10llu  ", btrfs_stack_inode_size(&ii));
+		else
+			printf("%10llu  ", 0ULL);
 	}
 
+	read_extent_buffer(eb, namebuf, (unsigned long)(di + 1),
+			   btrfs_dir_name_len(eb, di));
+	printf("%24.24s  %.*s", filetime, btrfs_dir_name_len(eb, di), namebuf);
+	if (type == BTRFS_FT_SYMLINK)
+		printf(" -> %s", target ? target : "?");
 	printf("\n");
-
-	return 0;
+out:
+	free(target);
+	return ret;
 }
 
 int btrfs_probe(struct blk_desc *fs_dev_desc,
 		struct disk_partition *fs_partition)
 {
-	btrfs_blk_desc = fs_dev_desc;
-	btrfs_part_info = fs_partition;
-
-	memset(&btrfs_info, 0, sizeof(btrfs_info));
+	struct btrfs_fs_info *fs_info;
+	int ret = -1;
 
 	btrfs_hash_init();
-	if (btrfs_read_superblock())
-		return -1;
-
-	if (btrfs_chunk_map_init()) {
-		printf("%s: failed to init chunk map\n", __func__);
-		return -1;
+	fs_info = open_ctree_fs_info(fs_dev_desc, fs_partition);
+	if (fs_info) {
+		current_fs_info = fs_info;
+		ret = 0;
 	}
-
-	btrfs_info.tree_root.objectid = 0;
-	btrfs_info.tree_root.bytenr = btrfs_info.sb.root;
-	btrfs_info.chunk_root.objectid = 0;
-	btrfs_info.chunk_root.bytenr = btrfs_info.sb.chunk_root;
-
-	if (btrfs_read_chunk_tree()) {
-		printf("%s: failed to read chunk tree\n", __func__);
-		return -1;
-	}
-
-	if (btrfs_find_root(btrfs_get_default_subvol_objectid(),
-			    &btrfs_info.fs_root, NULL)) {
-		printf("%s: failed to find default subvolume\n", __func__);
-		return -1;
-	}
-
-	return 0;
+	return ret;
 }
 
 int btrfs_ls(const char *path)
 {
-	struct btrfs_root root = btrfs_info.fs_root;
-	u64 inr;
+	struct btrfs_fs_info *fs_info = current_fs_info;
+	struct btrfs_root *root = fs_info->fs_root;
+	u64 ino = BTRFS_FIRST_FREE_OBJECTID;
 	u8 type;
+	int ret;
 
-	inr = btrfs_lookup_path(&root, root.root_dirid, path, &type, NULL, 40);
-
-	if (inr == -1ULL) {
+	ASSERT(fs_info);
+	ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+				path, &root, &ino, &type, 40);
+	if (ret < 0) {
 		printf("Cannot lookup path %s\n", path);
-		return -1;
+		return ret;
 	}
 
 	if (type != BTRFS_FT_DIR) {
-		printf("Not a directory: %s\n", path);
-		return -1;
+		error("Not a directory: %s", path);
+		return -ENOENT;
 	}
-
-	if (btrfs_readdir(&root, inr, readdir_callback)) {
-		printf("An error occured while listing directory %s\n", path);
-		return -1;
+	ret = btrfs_iter_dir(root, ino, show_dir);
+	if (ret < 0) {
+		error("An error occured while listing directory %s", path);
+		return ret;
 	}
-
 	return 0;
 }
 
 int btrfs_exists(const char *file)
 {
-	struct btrfs_root root = btrfs_info.fs_root;
-	u64 inr;
+	struct btrfs_fs_info *fs_info = current_fs_info;
+	struct btrfs_root *root;
+	u64 ino;
 	u8 type;
+	int ret;
 
-	inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, NULL, 40);
+	ASSERT(fs_info);
 
-	return (inr != -1ULL && type == BTRFS_FT_REG_FILE);
+	ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+				file, &root, &ino, &type, 40);
+	if (ret < 0)
+		return 0;
+
+	if (type == BTRFS_FT_REG_FILE)
+		return 1;
+	return 0;
 }
 
 int btrfs_size(const char *file, loff_t *size)
 {
-	struct btrfs_root root = btrfs_info.fs_root;
-	struct btrfs_inode_item inode;
-	u64 inr;
+	struct btrfs_fs_info *fs_info = current_fs_info;
+	struct btrfs_inode_item *ii;
+	struct btrfs_root *root;
+	struct btrfs_path path;
+	struct btrfs_key key;
+	u64 ino;
 	u8 type;
+	int ret;
 
-	inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
-				40);
-
-	if (inr == -1ULL) {
+	ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+				file, &root, &ino, &type, 40);
+	if (ret < 0) {
 		printf("Cannot lookup file %s\n", file);
-		return -1;
+		return ret;
 	}
-
 	if (type != BTRFS_FT_REG_FILE) {
 		printf("Not a regular file: %s\n", file);
-		return -1;
+		return -ENOENT;
 	}
-
-	*size = inode.size;
-	return 0;
+	btrfs_init_path(&path);
+	key.objectid = ino;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0) {
+		printf("Cannot lookup ino %llu\n", ino);
+		return ret;
+	}
+	if (ret > 0) {
+		printf("Ino %llu does not exist\n", ino);
+		ret = -ENOENT;
+		goto out;
+	}
+	ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
+			    struct btrfs_inode_item);
+	*size = btrfs_inode_size(path.nodes[0], ii);
+out:
+	btrfs_release_path(&path);
+	return ret;
 }
 
 int btrfs_read(const char *file, void *buf, loff_t offset, loff_t len,
 	       loff_t *actread)
 {
-	struct btrfs_root root = btrfs_info.fs_root;
-	struct btrfs_inode_item inode;
-	u64 inr, rd;
+	struct btrfs_fs_info *fs_info = current_fs_info;
+	struct btrfs_root *root;
+	loff_t real_size = 0;
+	u64 ino;
 	u8 type;
-
-	inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
-				40);
-
-	if (inr == -1ULL) {
-		printf("Cannot lookup file %s\n", file);
-		return -1;
+	int ret;
+
+	ASSERT(fs_info);
+	ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+				file, &root, &ino, &type, 40);
+	if (ret < 0) {
+		error("Cannot lookup file %s", file);
+		return ret;
 	}
 
 	if (type != BTRFS_FT_REG_FILE) {
-		printf("Not a regular file: %s\n", file);
-		return -1;
+		error("Not a regular file: %s", file);
+		return -EINVAL;
 	}
 
-	if (!len)
-		len = inode.size;
+	if (!len) {
+		ret = btrfs_size(file, &real_size);
+		if (ret < 0) {
+			error("Failed to get inode size: %s", file);
+			return ret;
+		}
+		len = real_size;
+	}
 
-	if (len > inode.size - offset)
-		len = inode.size - offset;
+	if (len > real_size - offset)
+		len = real_size - offset;
 
-	rd = btrfs_file_read(&root, inr, offset, len, buf);
-	if (rd == -1ULL) {
-		printf("An error occured while reading file %s\n", file);
-		return -1;
+	ret = btrfs_file_read(root, ino, offset, len, buf);
+	if (ret < 0) {
+		error("An error occured while reading file %s", file);
+		return ret;
 	}
 
-	*actread = rd;
+	*actread = len;
 	return 0;
 }
 
 void btrfs_close(void)
 {
-	btrfs_chunk_map_exit();
+	if (current_fs_info) {
+		close_ctree_fs_info(current_fs_info);
+		current_fs_info = NULL;
+	}
 }
 
 int btrfs_uuid(char *uuid_str)
 {
 #ifdef CONFIG_LIB_UUID
-	uuid_bin_to_str(btrfs_info.sb.fsid, uuid_str, UUID_STR_FORMAT_STD);
+	if (current_fs_info)
+		uuid_bin_to_str(current_fs_info->super_copy->fsid, uuid_str,
+				UUID_STR_FORMAT_STD);
 	return 0;
 #endif
 	return -ENOSYS;
diff --git a/fs/btrfs/btrfs.h b/fs/btrfs/btrfs.h
index 25a8cf6a879..7d8b395b264 100644
--- a/fs/btrfs/btrfs.h
+++ b/fs/btrfs/btrfs.h
@@ -11,77 +11,18 @@
 #include <linux/rbtree.h>
 #include "conv-funcs.h"
 
-struct btrfs_info {
-	struct btrfs_super_block sb;
-
-	struct btrfs_root tree_root;
-	struct btrfs_root fs_root;
-	struct btrfs_root chunk_root;
-
-	struct rb_root chunks_root;
-};
-
 extern struct btrfs_info btrfs_info;
-
-/* hash.c */
-void btrfs_hash_init(void);
-u32 btrfs_crc32c(u32, const void *, size_t);
-u32 btrfs_csum_data(char *, u32, size_t);
-void btrfs_csum_final(u32, void *);
-
-static inline u64 btrfs_name_hash(const char *name, int len)
-{
-	return btrfs_crc32c((u32) ~1, name, len);
-}
-
-/* dev.c */
-extern struct blk_desc *btrfs_blk_desc;
-extern struct disk_partition *btrfs_part_info;
-
-int btrfs_devread(u64, int, void *);
-
-/* chunk-map.c */
-u64 btrfs_map_logical_to_physical(u64);
-int btrfs_chunk_map_init(void);
-void btrfs_chunk_map_exit(void);
-int btrfs_read_chunk_tree(void);
+extern struct btrfs_fs_info *current_fs_info;
 
 /* compression.c */
 u32 btrfs_decompress(u8 type, const char *, u32, char *, u32);
 
-/* super.c */
-int btrfs_read_superblock(void);
-
-/* dir-item.c */
-typedef int (*btrfs_readdir_callback_t)(const struct btrfs_root *,
-					struct btrfs_dir_item *);
-
-int btrfs_lookup_dir_item(const struct btrfs_root *, u64, const char *, int,
-			   struct btrfs_dir_item *);
-int btrfs_readdir(const struct btrfs_root *, u64, btrfs_readdir_callback_t);
-
-/* root.c */
-int btrfs_find_root(u64, struct btrfs_root *, struct btrfs_root_item *);
-u64 btrfs_lookup_root_ref(u64, struct btrfs_root_ref *, char *);
-
 /* inode.c */
-u64 btrfs_lookup_inode_ref(struct btrfs_root *, u64, struct btrfs_inode_ref *,
-			    char *);
-int btrfs_lookup_inode(const struct btrfs_root *, struct btrfs_key *,
-		        struct btrfs_inode_item *, struct btrfs_root *);
-int btrfs_readlink(const struct btrfs_root *, u64, char *);
-u64 btrfs_lookup_path(struct btrfs_root *, u64, const char *, u8 *,
-		       struct btrfs_inode_item *, int);
-u64 btrfs_file_read(const struct btrfs_root *, u64, u64, u64, char *);
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+		    char *dest);
 
 /* subvolume.c */
 u64 btrfs_get_default_subvol_objectid(void);
 
-/* extent-io.c */
-u64 btrfs_read_extent_inline(struct btrfs_path *,
-			      struct btrfs_file_extent_item *, u64, u64,
-			      char *);
-u64 btrfs_read_extent_reg(struct btrfs_path *, struct btrfs_file_extent_item *,
-			   u64, u64, char *);
-
 #endif /* !__BTRFS_BTRFS_H__ */
diff --git a/fs/btrfs/btrfs_tree.h b/fs/btrfs/btrfs_tree.h
deleted file mode 100644
index aa0f3d6c86d..00000000000
--- a/fs/btrfs/btrfs_tree.h
+++ /dev/null
@@ -1,766 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * From linux/include/uapi/linux/btrfs_tree.h
- */
-
-#ifndef __BTRFS_BTRFS_TREE_H__
-#define __BTRFS_BTRFS_TREE_H__
-
-#include <common.h>
-
-#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_NAME_MAX 255
-#define BTRFS_LABEL_SIZE 256
-#define BTRFS_FSID_SIZE 16
-#define BTRFS_UUID_SIZE 16
-
-/*
- * This header contains the structure definitions and constants used
- * by file system objects that can be retrieved using
- * the BTRFS_IOC_SEARCH_TREE ioctl.  That means basically anything that
- * is needed to describe a leaf node's key or item contents.
- */
-
-/* holds pointers to all of the tree roots */
-#define BTRFS_ROOT_TREE_OBJECTID 1ULL
-
-/* stores information about which extents are in use, and reference counts */
-#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
-
-/*
- * chunk tree stores translations from logical -> physical block numbering
- * the super block points to the chunk tree
- */
-#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
-
-/*
- * stores information about which areas of a given device are in use.
- * one per device.  The tree of tree roots points to the device tree
- */
-#define BTRFS_DEV_TREE_OBJECTID 4ULL
-
-/* one per subvolume, storing files and directories */
-#define BTRFS_FS_TREE_OBJECTID 5ULL
-
-/* directory objectid inside the root tree */
-#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
-
-/* holds checksums of all the data extents */
-#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-
-/* holds quota configuration and tracking */
-#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
-
-/* for storing items that use the BTRFS_UUID_KEY* types */
-#define BTRFS_UUID_TREE_OBJECTID 9ULL
-
-/* tracks free space in block groups. */
-#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
-
-/* device stats in the device tree */
-#define BTRFS_DEV_STATS_OBJECTID 0ULL
-
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
-/* orhpan objectid for tracking unlinked/truncated files */
-#define BTRFS_ORPHAN_OBJECTID -5ULL
-
-/* does write ahead logging to speed up fsyncs */
-#define BTRFS_TREE_LOG_OBJECTID -6ULL
-#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
-
-/* for space balancing */
-#define BTRFS_TREE_RELOC_OBJECTID -8ULL
-#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
-
-/*
- * extent checksums all have this objectid
- * this allows them to share the logging tree
- * for fsyncs
- */
-#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
-
-/* For storing free space cache */
-#define BTRFS_FREE_SPACE_OBJECTID -11ULL
-
-/*
- * The inode number assigned to the special inode for storing
- * free ino cache
- */
-#define BTRFS_FREE_INO_OBJECTID -12ULL
-
-/* dummy objectid represents multiple objectids */
-#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
-
-/*
- * All files have objectids in this range.
- */
-#define BTRFS_FIRST_FREE_OBJECTID 256ULL
-#define BTRFS_LAST_FREE_OBJECTID -256ULL
-#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
-
-
-/*
- * the device items go into the chunk tree.  The key is in the form
- * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
- */
-#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
-
-#define BTRFS_BTREE_INODE_OBJECTID 1
-
-#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
-
-#define BTRFS_DEV_REPLACE_DEVID 0ULL
-
-/*
- * inode items have the data typically returned from stat and store other
- * info about object characteristics.  There is one for every file and dir in
- * the FS
- */
-#define BTRFS_INODE_ITEM_KEY		1
-#define BTRFS_INODE_REF_KEY		12
-#define BTRFS_INODE_EXTREF_KEY		13
-#define BTRFS_XATTR_ITEM_KEY		24
-#define BTRFS_ORPHAN_ITEM_KEY		48
-/* reserve 2-15 close to the inode for later flexibility */
-
-/*
- * dir items are the name -> inode pointers in a directory.  There is one
- * for every name in a directory.
- */
-#define BTRFS_DIR_LOG_ITEM_KEY  60
-#define BTRFS_DIR_LOG_INDEX_KEY 72
-#define BTRFS_DIR_ITEM_KEY	84
-#define BTRFS_DIR_INDEX_KEY	96
-/*
- * extent data is for file data
- */
-#define BTRFS_EXTENT_DATA_KEY	108
-
-/*
- * extent csums are stored in a separate tree and hold csums for
- * an entire extent on disk.
- */
-#define BTRFS_EXTENT_CSUM_KEY	128
-
-/*
- * root items point to tree roots.  They are typically in the root
- * tree used by the super block to find all the other trees
- */
-#define BTRFS_ROOT_ITEM_KEY	132
-
-/*
- * root backrefs tie subvols and snapshots to the directory entries that
- * reference them
- */
-#define BTRFS_ROOT_BACKREF_KEY	144
-
-/*
- * root refs make a fast index for listing all of the snapshots and
- * subvolumes referenced by a given root.  They point directly to the
- * directory item in the root that references the subvol
- */
-#define BTRFS_ROOT_REF_KEY	156
-
-/*
- * extent items are in the extent map tree.  These record which blocks
- * are used, and how many references there are to each block
- */
-#define BTRFS_EXTENT_ITEM_KEY	168
-
-/*
- * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
- * the length, so we save the level in key->offset instead of the length.
- */
-#define BTRFS_METADATA_ITEM_KEY	169
-
-#define BTRFS_TREE_BLOCK_REF_KEY	176
-
-#define BTRFS_EXTENT_DATA_REF_KEY	178
-
-#define BTRFS_EXTENT_REF_V0_KEY		180
-
-#define BTRFS_SHARED_BLOCK_REF_KEY	182
-
-#define BTRFS_SHARED_DATA_REF_KEY	184
-
-/*
- * block groups give us hints into the extent allocation trees.  Which
- * blocks are free etc etc
- */
-#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
-
-/*
- * Every block group is represented in the free space tree by a free space info
- * item, which stores some accounting information. It is keyed on
- * (block_group_start, FREE_SPACE_INFO, block_group_length).
- */
-#define BTRFS_FREE_SPACE_INFO_KEY 198
-
-/*
- * A free space extent tracks an extent of space that is free in a block group.
- * It is keyed on (start, FREE_SPACE_EXTENT, length).
- */
-#define BTRFS_FREE_SPACE_EXTENT_KEY 199
-
-/*
- * When a block group becomes very fragmented, we convert it to use bitmaps
- * instead of extents. A free space bitmap is keyed on
- * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
- * (length / sectorsize) bits.
- */
-#define BTRFS_FREE_SPACE_BITMAP_KEY 200
-
-#define BTRFS_DEV_EXTENT_KEY	204
-#define BTRFS_DEV_ITEM_KEY	216
-#define BTRFS_CHUNK_ITEM_KEY	228
-
-/*
- * Records the overall state of the qgroups.
- * There's only one instance of this key present,
- * (0, BTRFS_QGROUP_STATUS_KEY, 0)
- */
-#define BTRFS_QGROUP_STATUS_KEY         240
-/*
- * Records the currently used space of the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_INFO_KEY           242
-/*
- * Contains the user configured limits for the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_LIMIT_KEY          244
-/*
- * Records the child-parent relationship of qgroups. For
- * each relation, 2 keys are present:
- * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
- * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
- */
-#define BTRFS_QGROUP_RELATION_KEY       246
-
-/*
- * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
- */
-#define BTRFS_BALANCE_ITEM_KEY	248
-
-/*
- * The key type for tree items that are stored persistently, but do not need to
- * exist for extended period of time. The items can exist in any tree.
- *
- * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - balance status item
- *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
- */
-#define BTRFS_TEMPORARY_ITEM_KEY	248
-
-/*
- * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
- */
-#define BTRFS_DEV_STATS_KEY		249
-
-/*
- * The key type for tree items that are stored persistently and usually exist
- * for a long period, eg. filesystem lifetime. The item kinds can be status
- * information, stats or preference values. The item can exist in any tree.
- *
- * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - device statistics, store IO stats in the device tree, one key for all
- *   stats
- *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
- */
-#define BTRFS_PERSISTENT_ITEM_KEY	249
-
-/*
- * Persistantly stores the device replace state in the device tree.
- * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
- */
-#define BTRFS_DEV_REPLACE_KEY	250
-
-/*
- * Stores items that allow to quickly map UUIDs to something else.
- * These items are part of the filesystem UUID tree.
- * The key is built like this:
- * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
- */
-#if BTRFS_UUID_SIZE != 16
-#error "UUID items require BTRFS_UUID_SIZE == 16!"
-#endif
-#define BTRFS_UUID_KEY_SUBVOL	251	/* for UUIDs assigned to subvols */
-#define BTRFS_UUID_KEY_RECEIVED_SUBVOL	252	/* for UUIDs assigned to
-						 * received subvols */
-
-/*
- * string items are for debugging.  They just store a short string of
- * data in the FS
- */
-#define BTRFS_STRING_ITEM_KEY	253
-
-
-
-/* 32 bytes in various csum fields */
-#define BTRFS_CSUM_SIZE 32
-
-/* csum types */
-#define BTRFS_CSUM_TYPE_CRC32	0
-
-/*
- * flags definitions for directory entry item type
- *
- * Used by:
- * struct btrfs_dir_item.type
- */
-#define BTRFS_FT_UNKNOWN	0
-#define BTRFS_FT_REG_FILE	1
-#define BTRFS_FT_DIR		2
-#define BTRFS_FT_CHRDEV		3
-#define BTRFS_FT_BLKDEV		4
-#define BTRFS_FT_FIFO		5
-#define BTRFS_FT_SOCK		6
-#define BTRFS_FT_SYMLINK	7
-#define BTRFS_FT_XATTR		8
-#define BTRFS_FT_MAX		9
-
-/*
- * The key defines the order in the tree, and so it also defines (optimal)
- * block layout.
- *
- * objectid corresponds to the inode number.
- *
- * type tells us things about the object, and is a kind of stream selector.
- * so for a given inode, keys with type of 1 might refer to the inode data,
- * type of 2 may point to file data in the btree and type == 3 may point to
- * extents.
- *
- * offset is the starting byte offset for this key in the stream.
- */
-
-struct btrfs_key {
-	__u64 objectid;
-	__u8 type;
-	__u64 offset;
-} __attribute__ ((__packed__));
-
-struct btrfs_dev_item {
-	/* the internal btrfs device id */
-	__u64 devid;
-
-	/* size of the device */
-	__u64 total_bytes;
-
-	/* bytes used */
-	__u64 bytes_used;
-
-	/* optimal io alignment for this device */
-	__u32 io_align;
-
-	/* optimal io width for this device */
-	__u32 io_width;
-
-	/* minimal io size for this device */
-	__u32 sector_size;
-
-	/* type and info about this device */
-	__u64 type;
-
-	/* expected generation for this device */
-	__u64 generation;
-
-	/*
-	 * starting byte of this partition on the device,
-	 * to allow for stripe alignment in the future
-	 */
-	__u64 start_offset;
-
-	/* grouping information for allocation decisions */
-	__u32 dev_group;
-
-	/* seek speed 0-100 where 100 is fastest */
-	__u8 seek_speed;
-
-	/* bandwidth 0-100 where 100 is fastest */
-	__u8 bandwidth;
-
-	/* btrfs generated uuid for this device */
-	__u8 uuid[BTRFS_UUID_SIZE];
-
-	/* uuid of FS who owns this device */
-	__u8 fsid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_stripe {
-	__u64 devid;
-	__u64 offset;
-	__u8 dev_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_chunk {
-	/* size of this chunk in bytes */
-	__u64 length;
-
-	/* objectid of the root referencing this chunk */
-	__u64 owner;
-
-	__u64 stripe_len;
-	__u64 type;
-
-	/* optimal io alignment for this chunk */
-	__u32 io_align;
-
-	/* optimal io width for this chunk */
-	__u32 io_width;
-
-	/* minimal io size for this chunk */
-	__u32 sector_size;
-
-	/* 2^16 stripes is quite a lot, a second limit is the size of a single
-	 * item in the btree
-	 */
-	__u16 num_stripes;
-
-	/* sub stripes only matter for raid10 */
-	__u16 sub_stripes;
-	struct btrfs_stripe stripe;
-	/* additional stripes go here */
-} __attribute__ ((__packed__));
-
-#define BTRFS_FREE_SPACE_EXTENT	1
-#define BTRFS_FREE_SPACE_BITMAP	2
-
-struct btrfs_free_space_entry {
-	__u64 offset;
-	__u64 bytes;
-	__u8 type;
-} __attribute__ ((__packed__));
-
-struct btrfs_free_space_header {
-	struct btrfs_key location;
-	__u64 generation;
-	__u64 num_entries;
-	__u64 num_bitmaps;
-} __attribute__ ((__packed__));
-
-#define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
-#define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
-
-/* Super block flags */
-/* Errors detected */
-#define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
-
-#define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
-#define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
-
-
-/*
- * items in the extent btree are used to record the objectid of the
- * owner of the block and the number of references
- */
-
-struct btrfs_extent_item {
-	__u64 refs;
-	__u64 generation;
-	__u64 flags;
-} __attribute__ ((__packed__));
-
-
-#define BTRFS_EXTENT_FLAG_DATA		(1ULL << 0)
-#define BTRFS_EXTENT_FLAG_TREE_BLOCK	(1ULL << 1)
-
-/* following flags only apply to tree blocks */
-
-/* use full backrefs for extent pointers in the block */
-#define BTRFS_BLOCK_FLAG_FULL_BACKREF	(1ULL << 8)
-
-/*
- * this flag is only used internally by scrub and may be changed at any time
- * it is only declared here to avoid collisions
- */
-#define BTRFS_EXTENT_FLAG_SUPER		(1ULL << 48)
-
-struct btrfs_tree_block_info {
-	struct btrfs_key key;
-	__u8 level;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_data_ref {
-	__u64 root;
-	__u64 objectid;
-	__u64 offset;
-	__u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_shared_data_ref {
-	__u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_inline_ref {
-	__u8 type;
-	__u64 offset;
-} __attribute__ ((__packed__));
-
-/* dev extents record free space on individual devices.  The owner
- * field points back to the chunk allocation mapping tree that allocated
- * the extent.  The chunk tree uuid field is a way to double check the owner
- */
-struct btrfs_dev_extent {
-	__u64 chunk_tree;
-	__u64 chunk_objectid;
-	__u64 chunk_offset;
-	__u64 length;
-	__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_ref {
-	__u64 index;
-	__u16 name_len;
-	/* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_extref {
-	__u64 parent_objectid;
-	__u64 index;
-	__u16 name_len;
-	__u8   name[0];
-	/* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_timespec {
-	__u64 sec;
-	__u32 nsec;
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_item {
-	/* nfs style generation number */
-	__u64 generation;
-	/* transid that last touched this inode */
-	__u64 transid;
-	__u64 size;
-	__u64 nbytes;
-	__u64 block_group;
-	__u32 nlink;
-	__u32 uid;
-	__u32 gid;
-	__u32 mode;
-	__u64 rdev;
-	__u64 flags;
-
-	/* modification sequence number for NFS */
-	__u64 sequence;
-
-	/*
-	 * a little future expansion, for more than this we can
-	 * just grow the inode item and version it
-	 */
-	__u64 reserved[4];
-	struct btrfs_timespec atime;
-	struct btrfs_timespec ctime;
-	struct btrfs_timespec mtime;
-	struct btrfs_timespec otime;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_log_item {
-	__u64 end;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_item {
-	struct btrfs_key location;
-	__u64 transid;
-	__u16 data_len;
-	__u16 name_len;
-	__u8 type;
-} __attribute__ ((__packed__));
-
-#define BTRFS_ROOT_SUBVOL_RDONLY	(1ULL << 0)
-
-/*
- * Internal in-memory flag that a subvolume has been marked for deletion but
- * still visible as a directory
- */
-#define BTRFS_ROOT_SUBVOL_DEAD		(1ULL << 48)
-
-struct btrfs_root_item {
-	struct btrfs_inode_item inode;
-	__u64 generation;
-	__u64 root_dirid;
-	__u64 bytenr;
-	__u64 byte_limit;
-	__u64 bytes_used;
-	__u64 last_snapshot;
-	__u64 flags;
-	__u32 refs;
-	struct btrfs_key drop_progress;
-	__u8 drop_level;
-	__u8 level;
-
-	/*
-	 * The following fields appear after subvol_uuids+subvol_times
-	 * were introduced.
-	 */
-
-	/*
-	 * This generation number is used to test if the new fields are valid
-	 * and up to date while reading the root item. Every time the root item
-	 * is written out, the "generation" field is copied into this field. If
-	 * anyone ever mounted the fs with an older kernel, we will have
-	 * mismatching generation values here and thus must invalidate the
-	 * new fields. See btrfs_update_root and btrfs_find_last_root for
-	 * details.
-	 * the offset of generation_v2 is also used as the start for the memset
-	 * when invalidating the fields.
-	 */
-	__u64 generation_v2;
-	__u8 uuid[BTRFS_UUID_SIZE];
-	__u8 parent_uuid[BTRFS_UUID_SIZE];
-	__u8 received_uuid[BTRFS_UUID_SIZE];
-	__u64 ctransid; /* updated when an inode changes */
-	__u64 otransid; /* trans when created */
-	__u64 stransid; /* trans when sent. non-zero for received subvol */
-	__u64 rtransid; /* trans when received. non-zero for received subvol */
-	struct btrfs_timespec ctime;
-	struct btrfs_timespec otime;
-	struct btrfs_timespec stime;
-	struct btrfs_timespec rtime;
-	__u64 reserved[8]; /* for future */
-} __attribute__ ((__packed__));
-
-/*
- * this is used for both forward and backward root refs
- */
-struct btrfs_root_ref {
-	__u64 dirid;
-	__u64 sequence;
-	__u16 name_len;
-} __attribute__ ((__packed__));
-
-#define BTRFS_FILE_EXTENT_INLINE 0
-#define BTRFS_FILE_EXTENT_REG 1
-#define BTRFS_FILE_EXTENT_PREALLOC 2
-
-enum btrfs_compression_type {
-	BTRFS_COMPRESS_NONE  = 0,
-	BTRFS_COMPRESS_ZLIB  = 1,
-	BTRFS_COMPRESS_LZO   = 2,
-	BTRFS_COMPRESS_ZSTD  = 3,
-	BTRFS_COMPRESS_TYPES = 3,
-	BTRFS_COMPRESS_LAST  = 4,
-};
-
-struct btrfs_file_extent_item {
-	/*
-	 * transaction id that created this extent
-	 */
-	__u64 generation;
-	/*
-	 * max number of bytes to hold this extent in ram
-	 * when we split a compressed extent we can't know how big
-	 * each of the resulting pieces will be.  So, this is
-	 * an upper limit on the size of the extent in ram instead of
-	 * an exact limit.
-	 */
-	__u64 ram_bytes;
-
-	/*
-	 * 32 bits for the various ways we might encode the data,
-	 * including compression and encryption.  If any of these
-	 * are set to something a given disk format doesn't understand
-	 * it is treated like an incompat flag for reading and writing,
-	 * but not for stat.
-	 */
-	__u8 compression;
-	__u8 encryption;
-	__u16 other_encoding; /* spare for later use */
-
-	/* are we inline data or a real extent? */
-	__u8 type;
-
-	/*
-	 * disk space consumed by the extent, checksum blocks are included
-	 * in these numbers
-	 *
-	 * At this offset in the structure, the inline extent data start.
-	 */
-	__u64 disk_bytenr;
-	__u64 disk_num_bytes;
-	/*
-	 * the logical offset in file blocks (no csums)
-	 * this extent record is for.  This allows a file extent to point
-	 * into the middle of an existing extent on disk, sharing it
-	 * between two snapshots (useful if some bytes in the middle of the
-	 * extent have changed
-	 */
-	__u64 offset;
-	/*
-	 * the logical number of file blocks (no csums included).  This
-	 * always reflects the size uncompressed and without encoding.
-	 */
-	__u64 num_bytes;
-
-} __attribute__ ((__packed__));
-
-struct btrfs_csum_item {
-	__u8 csum;
-} __attribute__ ((__packed__));
-
-/* different types of block groups (and chunks) */
-#define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
-#define BTRFS_BLOCK_GROUP_METADATA	(1ULL << 2)
-#define BTRFS_BLOCK_GROUP_RAID0		(1ULL << 3)
-#define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
-#define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
-#define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
-#define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
-#define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
-					 BTRFS_SPACE_INFO_GLOBAL_RSV)
-
-enum btrfs_raid_types {
-	BTRFS_RAID_RAID10,
-	BTRFS_RAID_RAID1,
-	BTRFS_RAID_DUP,
-	BTRFS_RAID_RAID0,
-	BTRFS_RAID_SINGLE,
-	BTRFS_RAID_RAID5,
-	BTRFS_RAID_RAID6,
-	BTRFS_NR_RAID_TYPES
-};
-
-#define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
-					 BTRFS_BLOCK_GROUP_SYSTEM |  \
-					 BTRFS_BLOCK_GROUP_METADATA)
-
-#define BTRFS_BLOCK_GROUP_PROFILE_MASK	(BTRFS_BLOCK_GROUP_RAID0 |   \
-					 BTRFS_BLOCK_GROUP_RAID1 |   \
-					 BTRFS_BLOCK_GROUP_RAID5 |   \
-					 BTRFS_BLOCK_GROUP_RAID6 |   \
-					 BTRFS_BLOCK_GROUP_DUP |     \
-					 BTRFS_BLOCK_GROUP_RAID10)
-#define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5 |   \
-					 BTRFS_BLOCK_GROUP_RAID6)
-
-/*
- * We need a bit for restriper to be able to tell when chunks of type
- * SINGLE are available.  This "extended" profile format is used in
- * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
- * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
- * to avoid remappings between two formats in future.
- */
-#define BTRFS_AVAIL_ALLOC_BIT_SINGLE	(1ULL << 48)
-
-/*
- * A fake block group type that is used to communicate global block reserve
- * size to userspace via the SPACE_INFO ioctl.
- */
-#define BTRFS_SPACE_INFO_GLOBAL_RSV	(1ULL << 49)
-
-#define BTRFS_EXTENDED_PROFILE_MASK	(BTRFS_BLOCK_GROUP_PROFILE_MASK | \
-					 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
-
-#endif /* __BTRFS_BTRFS_TREE_H__ */
diff --git a/fs/btrfs/chunk-map.c b/fs/btrfs/chunk-map.c
deleted file mode 100644
index 2e5be650672..00000000000
--- a/fs/btrfs/chunk-map.c
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <log.h>
-#include <malloc.h>
-
-struct chunk_map_item {
-	struct rb_node node;
-	u64 logical;
-	u64 length;
-	u64 physical;
-};
-
-static int add_chunk_mapping(struct btrfs_key *key, struct btrfs_chunk *chunk)
-{
-	struct btrfs_stripe *stripe;
-	u64 block_profile = chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
-	struct rb_node **new = &(btrfs_info.chunks_root.rb_node), *prnt = NULL;
-	struct chunk_map_item *map_item;
-
-	if (block_profile && block_profile != BTRFS_BLOCK_GROUP_DUP) {
-		printf("%s: unsupported chunk profile %llu\n", __func__,
-		       block_profile);
-		return -1;
-	} else if (!chunk->length) {
-		printf("%s: zero length chunk\n", __func__);
-		return -1;
-	}
-
-	stripe = &chunk->stripe;
-	btrfs_stripe_to_cpu(stripe);
-
-	while (*new) {
-		struct chunk_map_item *this;
-
-		this = rb_entry(*new, struct chunk_map_item, node);
-
-		prnt = *new;
-		if (key->offset < this->logical) {
-			new = &((*new)->rb_left);
-		} else if (key->offset > this->logical) {
-			new = &((*new)->rb_right);
-		} else {
-			debug("%s: Logical address %llu already in map!\n",
-			      __func__, key->offset);
-			return 0;
-		}
-	}
-
-	map_item = malloc(sizeof(struct chunk_map_item));
-	if (!map_item)
-		return -1;
-
-	map_item->logical = key->offset;
-	map_item->length = chunk->length;
-	map_item->physical = le64_to_cpu(chunk->stripe.offset);
-	rb_link_node(&map_item->node, prnt, new);
-	rb_insert_color(&map_item->node, &btrfs_info.chunks_root);
-
-	debug("%s: Mapping %llu to %llu\n", __func__, map_item->logical,
-	      map_item->physical);
-
-	return 0;
-}
-
-u64 btrfs_map_logical_to_physical(u64 logical)
-{
-	struct rb_node *node = btrfs_info.chunks_root.rb_node;
-
-	while (node) {
-		struct chunk_map_item *item;
-
-		item = rb_entry(node, struct chunk_map_item, node);
-
-		if (item->logical > logical)
-			node = node->rb_left;
-		else if (logical >= item->logical + item->length)
-			node = node->rb_right;
-		else
-			return item->physical + logical - item->logical;
-	}
-
-	printf("%s: Cannot map logical address %llu to physical\n", __func__,
-	       logical);
-
-	return -1ULL;
-}
-
-void btrfs_chunk_map_exit(void)
-{
-	struct rb_node *now, *next;
-	struct chunk_map_item *item;
-
-	for (now = rb_first_postorder(&btrfs_info.chunks_root); now; now = next)
-	{
-		item = rb_entry(now, struct chunk_map_item, node);
-		next = rb_next_postorder(now);
-		free(item);
-	}
-}
-
-int btrfs_chunk_map_init(void)
-{
-	u8 sys_chunk_array_copy[sizeof(btrfs_info.sb.sys_chunk_array)];
-	u8 * const start = sys_chunk_array_copy;
-	u8 * const end = start + btrfs_info.sb.sys_chunk_array_size;
-	u8 *cur;
-	struct btrfs_key *key;
-	struct btrfs_chunk *chunk;
-
-	btrfs_info.chunks_root = RB_ROOT;
-
-	memcpy(sys_chunk_array_copy, btrfs_info.sb.sys_chunk_array,
-	       sizeof(sys_chunk_array_copy));
-
-	for (cur = start; cur < end;) {
-		key = (struct btrfs_key *) cur;
-		cur += sizeof(struct btrfs_key);
-		chunk = (struct btrfs_chunk *) cur;
-
-		btrfs_key_to_cpu(key);
-		btrfs_chunk_to_cpu(chunk);
-
-		if (key->type != BTRFS_CHUNK_ITEM_KEY) {
-			printf("%s: invalid key type %u\n", __func__,
-			       key->type);
-			return -1;
-		}
-
-		if (add_chunk_mapping(key, chunk))
-			return -1;
-
-		cur += sizeof(struct btrfs_chunk);
-		cur += sizeof(struct btrfs_stripe) * (chunk->num_stripes - 1);
-	}
-
-	return 0;
-}
-
-int btrfs_read_chunk_tree(void)
-{
-	struct btrfs_path path;
-	struct btrfs_key key, *found_key;
-	struct btrfs_chunk *chunk;
-	int res = 0;
-
-	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
-	key.type = BTRFS_CHUNK_ITEM_KEY;
-	key.offset = 0;
-
-	if (btrfs_search_tree(&btrfs_info.chunk_root, &key, &path))
-		return -1;
-
-	do {
-		found_key = btrfs_path_leaf_key(&path);
-		if (btrfs_comp_keys_type(&key, found_key))
-			continue;
-
-		chunk = btrfs_path_item_ptr(&path, struct btrfs_chunk);
-		btrfs_chunk_to_cpu(chunk);
-		if (add_chunk_mapping(found_key, chunk)) {
-			res = -1;
-			break;
-		}
-	} while (!(res = btrfs_next_slot(&path)));
-
-	btrfs_free_path(&path);
-
-	if (res < 0)
-		return -1;
-
-	return 0;
-}
diff --git a/fs/btrfs/common/rbtree-utils.c b/fs/btrfs/common/rbtree-utils.c
new file mode 100644
index 00000000000..7a7d7e84e6a
--- /dev/null
+++ b/fs/btrfs/common/rbtree-utils.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/errno.h>
+#include "rbtree-utils.h"
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+	      rb_compare_nodes comp)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	int ret;
+
+	while(*p) {
+		parent = *p;
+
+		ret = comp(parent, node);
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else if (ret > 0)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return 0;
+}
+
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+			  struct rb_node **next_ret)
+{
+	struct rb_node *n = root->rb_node;
+	struct rb_node *parent = NULL;
+	int ret = 0;
+
+	while(n) {
+		parent = n;
+
+		ret = comp(n, key);
+		if (ret < 0)
+			n = n->rb_left;
+		else if (ret > 0)
+			n = n->rb_right;
+		else
+			return n;
+	}
+
+	if (!next_ret)
+		return NULL;
+
+	if (parent && ret > 0)
+		parent = rb_next(parent);
+
+	*next_ret = parent;
+	return NULL;
+}
+
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node)
+{
+	struct rb_node *node;
+
+	while ((node = rb_first(root))) {
+		rb_erase(node, root);
+		free_node(node);
+	}
+}
diff --git a/fs/btrfs/common/rbtree-utils.h b/fs/btrfs/common/rbtree-utils.h
new file mode 100644
index 00000000000..d977cfd9554
--- /dev/null
+++ b/fs/btrfs/common/rbtree-utils.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __RBTREE_UTILS__
+#define __RBTREE_UTILS__
+
+#include <linux/rbtree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The common insert/search/free functions */
+typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2);
+typedef int (*rb_compare_keys)(struct rb_node *node, void *key);
+typedef void (*rb_free_node)(struct rb_node *node);
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+	      rb_compare_nodes comp);
+/*
+ * In some cases, we need return the next node if we don't find the node we
+ * specify. At this time, we can use next_ret.
+ */
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+			  struct rb_node **next_ret);
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node);
+
+#define FREE_RB_BASED_TREE(name, free_func)		\
+static void free_##name##_tree(struct rb_root *root)	\
+{							\
+	rb_free_nodes(root, free_func);			\
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644
index 00000000000..9cf8a10c76c
--- /dev/null
+++ b/fs/btrfs/compat.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_COMPAT_H__
+#define __BTRFS_COMPAT_H__
+
+#include <linux/errno.h>
+#include <fs_internal.h>
+#include <uuid.h>
+
+/* Provide a compatibility layer to make code syncing easier */
+
+/* A simple wraper to for error() used in btrfs-progs */
+#define error(fmt, ...)		pr_err("BTRFS: " fmt "\n", ##__VA_ARGS__)
+
+#define ASSERT(c) assert(c)
+
+#define BTRFS_UUID_UNPARSED_SIZE	37
+
+/* No <linux/limits.h> so have to define it here */
+#define XATTR_NAME_MAX		255
+#define PATH_MAX		4096
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#define get_unaligned_le8(p) (*((u8 *)(p)))
+#define get_unaligned_8(p) (*((u8 *)(p)))
+#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val))
+#define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val))
+
+/*
+ * Read data from device specified by @desc and @part
+ *
+ * U-boot equivalent of pread().
+ *
+ * Return the bytes of data read.
+ * Return <0 for error.
+ */
+static inline int __btrfs_devread(struct blk_desc *desc,
+				  struct disk_partition *part,
+				  void *buf, size_t size, u64 offset)
+{
+	lbaint_t sector;
+	int byte_offset;
+	int ret;
+
+	sector = offset >> desc->log2blksz;
+	byte_offset = offset % desc->blksz;
+
+	/* fs_devread() return 0 for error, >0 for success */
+	ret = fs_devread(desc, part, sector, byte_offset, size, buf);
+	if (!ret)
+		return -EIO;
+	return size;
+}
+
+static inline void uuid_unparse(const u8 *uuid, char *out)
+{
+	return uuid_bin_to_str((unsigned char *)uuid, out, 0);
+}
+
+static inline int is_power_of_2(unsigned long n)
+{
+	return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 59e4a94cb2e..23efefa1997 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -115,7 +115,7 @@ static u32 decompress_zlib(const u8 *_cbuf, u32 clen, u8 *dbuf, u32 dlen)
 	while (stream.total_in < clen) {
 		stream.next_in = cbuf + stream.total_in;
 		stream.avail_in = min((u32) (clen - stream.total_in),
-				      (u32) btrfs_info.sb.sectorsize);
+					current_fs_info->sectorsize);
 
 		ret = inflate(&stream, Z_NO_FLUSH);
 		if (ret != Z_OK)
diff --git a/fs/btrfs/crypto/hash.c b/fs/btrfs/crypto/hash.c
new file mode 100644
index 00000000000..fb51f6386cb
--- /dev/null
+++ b/fs/btrfs/crypto/hash.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/xxhash.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/types.h>
+#include <u-boot/sha256.h>
+#include <u-boot/crc.h>
+
+static u32 btrfs_crc32c_table[256];
+
+void btrfs_hash_init(void)
+{
+	static int inited = 0;
+
+	if (!inited) {
+		crc32c_init(btrfs_crc32c_table, 0x82F63B78);
+		inited = 1;
+	}
+}
+
+int hash_sha256(const u8 *buf, size_t length, u8 *out)
+{
+	sha256_context ctx;
+
+	sha256_starts(&ctx);
+	sha256_update(&ctx, buf, length);
+	sha256_finish(&ctx, out);
+
+	return 0;
+}
+
+int hash_xxhash(const u8 *buf, size_t length, u8 *out)
+{
+	u64 hash;
+
+	hash = xxh64(buf, length, 0);
+	put_unaligned_le64(hash, out);
+
+	return 0;
+}
+
+int hash_crc32c(const u8 *buf, size_t length, u8 *out)
+{
+	u32 crc;
+
+	crc = crc32c_cal((u32)~0, (char *)buf, length, btrfs_crc32c_table);
+	put_unaligned_le32(~crc, out);
+
+	return 0;
+}
+
+u32 crc32c(u32 seed, const void * data, size_t len)
+{
+	return crc32c_cal(seed, data, len, btrfs_crc32c_table);
+}
diff --git a/fs/btrfs/crypto/hash.h b/fs/btrfs/crypto/hash.h
new file mode 100644
index 00000000000..d1ba1fa374e
--- /dev/null
+++ b/fs/btrfs/crypto/hash.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTO_HASH_H
+#define CRYPTO_HASH_H
+
+#include <linux/types.h>
+
+#define CRYPTO_HASH_SIZE_MAX	32
+
+void btrfs_hash_init(void);
+int hash_crc32c(const u8 *buf, size_t length, u8 *out);
+int hash_xxhash(const u8 *buf, size_t length, u8 *out);
+int hash_sha256(const u8 *buf, size_t length, u8 *out);
+
+u32 crc32c(u32 seed, const void * data, size_t len);
+
+/* Blake2B is not yet supported due to lack of library */
+
+#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 28f98d43ada..5ffced91606 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5,287 +5,738 @@
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
 #include <log.h>
 #include <malloc.h>
 #include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+
+static const struct btrfs_csum {
+	u16 size;
+	const char name[14];
+} btrfs_csums[] = {
+	[BTRFS_CSUM_TYPE_CRC32]		= {  4, "crc32c" },
+	[BTRFS_CSUM_TYPE_XXHASH]	= {  8, "xxhash64" },
+	[BTRFS_CSUM_TYPE_SHA256]	= { 32, "sha256" },
+	[BTRFS_CSUM_TYPE_BLAKE2]	= { 32, "blake2" },
+};
+
+u16 btrfs_super_csum_size(const struct btrfs_super_block *sb)
+{
+	const u16 csum_type = btrfs_super_csum_type(sb);
+
+	return btrfs_csums[csum_type].size;
+}
+
+const char *btrfs_super_csum_name(u16 csum_type)
+{
+	return btrfs_csums[csum_type].name;
+}
+
+size_t btrfs_super_num_csums(void)
+{
+	return ARRAY_SIZE(btrfs_csums);
+}
+
+u16 btrfs_csum_type_size(u16 csum_type)
+{
+	return btrfs_csums[csum_type].size;
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+	struct btrfs_path *path;
+	path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
+	return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+	if (!p)
+		return;
+	btrfs_release_path(p);
+	kfree(p);
+}
+
+void btrfs_release_path(struct btrfs_path *p)
+{
+	int i;
+	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+		if (!p->nodes[i])
+			continue;
+		free_extent_buffer(p->nodes[i]);
+	}
+	memset(p, 0, sizeof(*p));
+}
 
-int btrfs_comp_keys(struct btrfs_key *a, struct btrfs_key *b)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
 {
-	if (a->objectid > b->objectid)
+	if (k1->objectid > k2->objectid)
 		return 1;
-	if (a->objectid < b->objectid)
+	if (k1->objectid < k2->objectid)
 		return -1;
-	if (a->type > b->type)
+	if (k1->type > k2->type)
 		return 1;
-	if (a->type < b->type)
+	if (k1->type < k2->type)
 		return -1;
-	if (a->offset > b->offset)
+	if (k1->offset > k2->offset)
 		return 1;
-	if (a->offset < b->offset)
+	if (k1->offset < k2->offset)
 		return -1;
 	return 0;
 }
 
-int btrfs_comp_keys_type(struct btrfs_key *a, struct btrfs_key *b)
+static int btrfs_comp_keys(struct btrfs_disk_key *disk,
+			     const struct btrfs_key *k2)
 {
-	if (a->objectid > b->objectid)
-		return 1;
-	if (a->objectid < b->objectid)
-		return -1;
-	if (a->type > b->type)
-		return 1;
-	if (a->type < b->type)
-		return -1;
-	return 0;
+	struct btrfs_key k1;
+
+	btrfs_disk_key_to_cpu(&k1, disk);
+	return btrfs_comp_cpu_keys(&k1, k2);
 }
 
-static int generic_bin_search(void *addr, int item_size, struct btrfs_key *key,
-			      int max, int *slot)
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+		 struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
 {
-	int low = 0, high = max, mid, ret;
-	struct btrfs_key *tmp;
+	int i;
+	struct btrfs_key cpukey;
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
+	enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+	if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(fs_info))
+		goto fail;
+
+	ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+	if (parent_key && parent_key->type) {
+		btrfs_node_key(buf, &key, 0);
+		if (memcmp(parent_key, &key, sizeof(key)))
+			goto fail;
+	}
+	ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+		btrfs_node_key(buf, &key, i);
+		btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0)
+			goto fail;
+	}
+	return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+	return ret;
+}
+
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+		 struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
+{
+	int i;
+	struct btrfs_key cpukey;
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
+	enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+	if (nritems * sizeof(struct btrfs_item) > buf->len)  {
+		fprintf(stderr, "invalid number of items %llu\n",
+			(unsigned long long)buf->start);
+		goto fail;
+	}
+
+	if (btrfs_header_level(buf) != 0) {
+		ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
+		fprintf(stderr, "leaf is not a leaf %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
+	}
+	if (btrfs_leaf_free_space(buf) < 0) {
+		ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
+		fprintf(stderr, "leaf free space incorrect %llu %d\n",
+			(unsigned long long)btrfs_header_bytenr(buf),
+			btrfs_leaf_free_space(buf));
+		goto fail;
+	}
 
-	while (low < high) {
+	if (nritems == 0)
+		return BTRFS_TREE_BLOCK_CLEAN;
+
+	btrfs_item_key(buf, &key, 0);
+	if (parent_key && parent_key->type &&
+	    memcmp(parent_key, &key, sizeof(key))) {
+		ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+		fprintf(stderr, "leaf parent key incorrect %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
+	}
+	for (i = 0; nritems > 1 && i < nritems - 1; i++) {
+		btrfs_item_key(buf, &key, i);
+		btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+			ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+			fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+			goto fail;
+		}
+		if (btrfs_item_offset_nr(buf, i) !=
+			btrfs_item_end_nr(buf, i + 1)) {
+			ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+			fprintf(stderr, "incorrect offsets %u %u\n",
+				btrfs_item_offset_nr(buf, i),
+				btrfs_item_end_nr(buf, i + 1));
+			goto fail;
+		}
+		if (i == 0 && btrfs_item_end_nr(buf, i) !=
+		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
+			ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+			fprintf(stderr, "bad item end %u wanted %u\n",
+				btrfs_item_end_nr(buf, i),
+				(unsigned)BTRFS_LEAF_DATA_SIZE(fs_info));
+			goto fail;
+		}
+	}
+
+	for (i = 0; i < nritems; i++) {
+		if (btrfs_item_end_nr(buf, i) >
+				BTRFS_LEAF_DATA_SIZE(fs_info)) {
+			btrfs_item_key(buf, &key, 0);
+			ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+			fprintf(stderr, "slot end outside of leaf %llu > %llu\n",
+				(unsigned long long)btrfs_item_end_nr(buf, i),
+				(unsigned long long)BTRFS_LEAF_DATA_SIZE(
+					fs_info));
+			goto fail;
+		}
+	}
+
+	return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+	return ret;
+}
+
+static int noinline check_block(struct btrfs_fs_info *fs_info,
+				struct btrfs_path *path, int level)
+{
+	struct btrfs_disk_key key;
+	struct btrfs_disk_key *key_ptr = NULL;
+	struct extent_buffer *parent;
+	enum btrfs_tree_block_status ret;
+
+	if (path->nodes[level + 1]) {
+		parent = path->nodes[level + 1];
+		btrfs_node_key(parent, &key, path->slots[level + 1]);
+		key_ptr = &key;
+	}
+	if (level == 0)
+		ret = btrfs_check_leaf(fs_info, key_ptr, path->nodes[0]);
+	else
+		ret = btrfs_check_node(fs_info, key_ptr, path->nodes[level]);
+	if (ret == BTRFS_TREE_BLOCK_CLEAN)
+		return 0;
+	return -EIO;
+}
+
+/*
+ * search for key in the extent_buffer.  The items start at offset p,
+ * and they are item_size apart.  There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
+			      int item_size, const struct btrfs_key *key,
+			      int max, int *slot)
+{
+	int low = 0;
+	int high = max;
+	int mid;
+	int ret;
+	unsigned long offset;
+	struct btrfs_disk_key *tmp;
+
+	while(low < high) {
 		mid = (low + high) / 2;
+		offset = p + mid * item_size;
 
-		tmp = (struct btrfs_key *) ((u8 *) addr + mid*item_size);
+		tmp = (struct btrfs_disk_key *)(eb->data + offset);
 		ret = btrfs_comp_keys(tmp, key);
 
-		if (ret < 0) {
+		if (ret < 0)
 			low = mid + 1;
-		} else if (ret > 0) {
+		else if (ret > 0)
 			high = mid;
-		} else {
+		else {
 			*slot = mid;
 			return 0;
 		}
 	}
-
 	*slot = low;
 	return 1;
 }
 
-int btrfs_bin_search(union btrfs_tree_node *p, struct btrfs_key *key,
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
 		     int *slot)
 {
-	void *addr;
-	unsigned long size;
-
-	if (p->header.level) {
-		addr = p->node.ptrs;
-		size = sizeof(struct btrfs_key_ptr);
-	} else {
-		addr = p->leaf.items;
-		size = sizeof(struct btrfs_item);
-	}
-
-	return generic_bin_search(addr, size, key, p->header.nritems, slot);
+	if (btrfs_header_level(eb) == 0)
+		return generic_bin_search(eb,
+					  offsetof(struct btrfs_leaf, items),
+					  sizeof(struct btrfs_item),
+					  key, btrfs_header_nritems(eb),
+					  slot);
+	else
+		return generic_bin_search(eb,
+					  offsetof(struct btrfs_node, ptrs),
+					  sizeof(struct btrfs_key_ptr),
+					  key, btrfs_header_nritems(eb),
+					  slot);
 }
 
-static void clear_path(struct btrfs_path *p)
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+				   struct extent_buffer *parent, int slot)
 {
-	int i;
-
-	for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-		p->nodes[i] = NULL;
-		p->slots[i] = 0;
+	struct extent_buffer *ret;
+	int level = btrfs_header_level(parent);
+
+	if (slot < 0)
+		return NULL;
+	if (slot >= btrfs_header_nritems(parent))
+		return NULL;
+
+	if (level == 0)
+		return NULL;
+
+	ret = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
+		       btrfs_node_ptr_generation(parent, slot));
+	if (!extent_buffer_uptodate(ret))
+		return ERR_PTR(-EIO);
+
+	if (btrfs_header_level(ret) != level - 1) {
+		error("child eb corrupted: parent bytenr=%llu item=%d parent level=%d child level=%d",
+		      btrfs_header_bytenr(parent), slot,
+		      btrfs_header_level(parent), btrfs_header_level(ret));
+		free_extent_buffer(ret);
+		return ERR_PTR(-EIO);
 	}
+	return ret;
 }
 
-void btrfs_free_path(struct btrfs_path *p)
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+		u64 iobjectid, u64 ioff, u8 key_type,
+		struct btrfs_key *found_key)
 {
-	int i;
+	int ret;
+	struct btrfs_key key;
+	struct extent_buffer *eb;
+	struct btrfs_path *path;
+
+	key.type = key_type;
+	key.objectid = iobjectid;
+	key.offset = ioff;
+
+	if (found_path == NULL) {
+		path = btrfs_alloc_path();
+		if (!path)
+			return -ENOMEM;
+	} else
+		path = found_path;
+
+	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+	if ((ret < 0) || (found_key == NULL))
+		goto out;
+
+	eb = path->nodes[0];
+	if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+		ret = btrfs_next_leaf(fs_root, path);
+		if (ret)
+			goto out;
+		eb = path->nodes[0];
+	}
 
-	for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-		if (p->nodes[i])
-			free(p->nodes[i]);
+	btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+	if (found_key->type != key.type ||
+			found_key->objectid != key.objectid) {
+		ret = 1;
+		goto out;
 	}
 
-	clear_path(p);
+out:
+	if (path != found_path)
+		btrfs_free_path(path);
+	return ret;
 }
 
-static int read_tree_node(u64 physical, union btrfs_tree_node **buf)
+/*
+ * look for key in the tree.  path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned.  If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ *
+ * NOTE: This version has no COW ability, thus we expect trans == NULL,
+ * ins_len == 0 and cow == 0.
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+		struct btrfs_root *root, const struct btrfs_key *key,
+		struct btrfs_path *p, int ins_len, int cow)
 {
-	ALLOC_CACHE_ALIGN_BUFFER(struct btrfs_header, hdr,
-				 sizeof(struct btrfs_header));
-	unsigned long size, offset = sizeof(*hdr);
-	union btrfs_tree_node *res;
-	u32 i;
-
-	if (!btrfs_devread(physical, sizeof(*hdr), hdr))
-		return -1;
-
-	btrfs_header_to_cpu(hdr);
-
-	if (hdr->level)
-		size = sizeof(struct btrfs_node)
-		       + hdr->nritems * sizeof(struct btrfs_key_ptr);
-	else
-		size = btrfs_info.sb.nodesize;
-
-	res = malloc_cache_aligned(size);
-	if (!res) {
-		debug("%s: malloc failed\n", __func__);
-		return -1;
-	}
-
-	if (!btrfs_devread(physical + offset, size - offset,
-			   ((u8 *) res) + offset)) {
-		free(res);
-		return -1;
+	struct extent_buffer *b;
+	int slot;
+	int ret;
+	int level;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	u8 lowest_level = 0;
+
+	assert(trans == NULL && ins_len == 0 && cow == 0);
+	lowest_level = p->lowest_level;
+	WARN_ON(lowest_level && ins_len > 0);
+	WARN_ON(p->nodes[0] != NULL);
+
+	b = root->node;
+	extent_buffer_get(b);
+	while (b) {
+		level = btrfs_header_level(b);
+		/*
+		if (cow) {
+			int wret;
+			wret = btrfs_cow_block(trans, root, b,
+					       p->nodes[level + 1],
+					       p->slots[level + 1],
+					       &b);
+			if (wret) {
+				free_extent_buffer(b);
+				return wret;
+			}
+		}
+		*/
+		BUG_ON(!cow && ins_len);
+		if (level != btrfs_header_level(b))
+			WARN_ON(1);
+		level = btrfs_header_level(b);
+		p->nodes[level] = b;
+		ret = check_block(fs_info, p, level);
+		if (ret)
+			return -1;
+		ret = btrfs_bin_search(b, key, &slot);
+		if (level != 0) {
+			if (ret && slot > 0)
+				slot -= 1;
+			p->slots[level] = slot;
+			/*
+			if ((p->search_for_split || ins_len > 0) &&
+			    btrfs_header_nritems(b) >=
+			    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
+				int sret = split_node(trans, root, p, level);
+				BUG_ON(sret > 0);
+				if (sret)
+					return sret;
+				b = p->nodes[level];
+				slot = p->slots[level];
+			} else if (ins_len < 0) {
+				int sret = balance_level(trans, root, p,
+							 level);
+				if (sret)
+					return sret;
+				b = p->nodes[level];
+				if (!b) {
+					btrfs_release_path(p);
+					goto again;
+				}
+				slot = p->slots[level];
+				BUG_ON(btrfs_header_nritems(b) == 1);
+			}
+			*/
+			/* this is only true while dropping a snapshot */
+			if (level == lowest_level)
+				break;
+
+			b = read_node_slot(fs_info, b, slot);
+			if (!extent_buffer_uptodate(b))
+				return -EIO;
+		} else {
+			p->slots[level] = slot;
+			/*
+			if (ins_len > 0 &&
+			    ins_len > btrfs_leaf_free_space(b)) {
+				int sret = split_leaf(trans, root, key,
+						      p, ins_len, ret == 0);
+				BUG_ON(sret > 0);
+				if (sret)
+					return sret;
+			}
+			*/
+			return ret;
+		}
 	}
-
-	memcpy(&res->header, hdr, sizeof(*hdr));
-	if (hdr->level)
-		for (i = 0; i < hdr->nritems; ++i)
-			btrfs_key_ptr_to_cpu(&res->node.ptrs[i]);
-	else
-		for (i = 0; i < hdr->nritems; ++i)
-			btrfs_item_to_cpu(&res->leaf.items[i]);
-
-	*buf = res;
-
-	return 0;
+	return 1;
 }
 
-int btrfs_search_tree(const struct btrfs_root *root, struct btrfs_key *key,
-		      struct btrfs_path *p)
+/*
+ * Helper to use instead of search slot if no exact match is needed but
+ * instead the next or previous item should be returned.
+ * When find_higher is true, the next higher item is returned, the next lower
+ * otherwise.
+ * When return_any and find_higher are both true, and no higher item is found,
+ * return the next lower instead.
+ * When return_any is true and find_higher is false, and no lower item is found,
+ * return the next higher instead.
+ * It returns 0 if any item is found, 1 if none is found (tree empty), and
+ * < 0 on error
+ */
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+			       const struct btrfs_key *key,
+			       struct btrfs_path *p, int find_higher,
+			       int return_any)
 {
-	u8 lvl, prev_lvl;
-	int i, slot, ret;
-	u64 logical, physical;
-	union btrfs_tree_node *buf;
-
-	clear_path(p);
-
-	logical = root->bytenr;
-
-	for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-		physical = btrfs_map_logical_to_physical(logical);
-		if (physical == -1ULL)
-			goto err;
-
-		if (read_tree_node(physical, &buf))
-			goto err;
-
-		lvl = buf->header.level;
-		if (i && prev_lvl != lvl + 1) {
-			printf("%s: invalid level in header at %llu\n",
-			       __func__, logical);
-			goto err;
+	int ret;
+	struct extent_buffer *leaf;
+
+again:
+	ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
+	if (ret <= 0)
+		 return ret;
+	/*
+	 * A return value of 1 means the path is at the position where the item
+	 * should be inserted. Normally this is the next bigger item, but in
+	 * case the previous item is the last in a leaf, path points to the
+	 * first free slot in the previous leaf, i.e. at an invalid item.
+	 */
+	leaf = p->nodes[0];
+
+	if (find_higher) {
+		if (p->slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, p);
+			if (ret <= 0)
+				return ret;
+			if (!return_any)
+				return 1;
+			/*
+			 * No higher item found, return the next lower instead
+			 */
+			return_any = 0;
+			find_higher = 0;
+			btrfs_release_path(p);
+			goto again;
 		}
-		prev_lvl = lvl;
-
-		ret = btrfs_bin_search(buf, key, &slot);
-		if (ret < 0)
-			goto err;
-		if (ret && slot > 0 && lvl)
-			slot -= 1;
-
-		p->slots[lvl] = slot;
-		p->nodes[lvl] = buf;
-
-		if (lvl) {
-			logical = buf->node.ptrs[slot].blockptr;
-		} else {
+	} else {
+		if (p->slots[0] == 0) {
+			ret = btrfs_prev_leaf(root, p);
+			if (ret < 0)
+				return ret;
+			if (!ret) {
+				leaf = p->nodes[0];
+				if (p->slots[0] == btrfs_header_nritems(leaf))
+					p->slots[0]--;
+				return 0;
+			}
+			if (!return_any)
+				return 1;
 			/*
-			 * The path might be invalid if:
-			 *   cur leaf max < searched value < next leaf min
-			 *
-			 * Jump to the next valid element if it exists.
+			 * No lower item found, return the next higher instead
 			 */
-			if (slot >= buf->header.nritems)
-				if (btrfs_next_slot(p) < 0)
-					goto err;
-			break;
+			return_any = 0;
+			find_higher = 1;
+			btrfs_release_path(p);
+			goto again;
+		} else {
+			--p->slots[0];
 		}
 	}
-
 	return 0;
-err:
-	btrfs_free_path(p);
-	return -1;
 }
 
-static int jump_leaf(struct btrfs_path *path, int dir)
+/*
+ * how many bytes are required to store the items in a leaf.  start
+ * and nr indicate which items in the leaf to check.  This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
 {
-	struct btrfs_path p;
-	u32 slot;
-	int level = 1, from_level, i;
-
-	dir = dir >= 0 ? 1 : -1;
+	int data_len;
+	int nritems = btrfs_header_nritems(l);
+	int end = min(nritems, start + nr) - 1;
+
+	if (!nr)
+		return 0;
+	data_len = btrfs_item_end_nr(l, start);
+	data_len = data_len - btrfs_item_offset_nr(l, end);
+	data_len += sizeof(struct btrfs_item) * nr;
+	WARN_ON(data_len < 0);
+	return data_len;
+}
 
-	p = *path;
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data.  IOW, how much room
+ * the leaf has left for both items and data
+ */
+int btrfs_leaf_free_space(struct extent_buffer *leaf)
+{
+	int nritems = btrfs_header_nritems(leaf);
+	u32 leaf_data_size;
+	int ret;
+
+	BUG_ON(leaf->fs_info && leaf->fs_info->nodesize != leaf->len);
+	leaf_data_size = __BTRFS_LEAF_DATA_SIZE(leaf->len);
+	ret = leaf_data_size - leaf_space_used(leaf, 0 ,nritems);
+	if (ret < 0) {
+		printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n",
+		       ret, leaf_data_size, leaf_space_used(leaf, 0, nritems),
+		       nritems);
+	}
+	return ret;
+}
 
-	while (level < BTRFS_MAX_LEVEL) {
-		if (!p.nodes[level])
+/*
+ * walk up the tree as far as required to find the previous leaf.
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+	int slot;
+	int level = 1;
+	struct extent_buffer *c;
+	struct extent_buffer *next = NULL;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	while(level < BTRFS_MAX_LEVEL) {
+		if (!path->nodes[level])
 			return 1;
 
-		slot = p.slots[level];
-		if ((dir > 0 && slot + dir >= p.nodes[level]->header.nritems)
-		    || (dir < 0 && !slot))
+		slot = path->slots[level];
+		c = path->nodes[level];
+		if (slot == 0) {
 			level++;
-		else
-			break;
-	}
-
-	if (level == BTRFS_MAX_LEVEL)
-		return 1;
-
-	p.slots[level] = slot + dir;
-	level--;
-	from_level = level;
-
-	while (level >= 0) {
-		u64 logical, physical;
-
-		slot = p.slots[level + 1];
-		logical = p.nodes[level + 1]->node.ptrs[slot].blockptr;
-		physical = btrfs_map_logical_to_physical(logical);
-		if (physical == -1ULL)
-			goto err;
-
-		if (read_tree_node(physical, &p.nodes[level]))
-			goto err;
+			if (level == BTRFS_MAX_LEVEL)
+				return 1;
+			continue;
+		}
+		slot--;
 
-		if (dir > 0)
-			p.slots[level] = 0;
-		else
-			p.slots[level] = p.nodes[level]->header.nritems - 1;
+		next = read_node_slot(fs_info, c, slot);
+		if (!extent_buffer_uptodate(next)) {
+			if (IS_ERR(next))
+				return PTR_ERR(next);
+			return -EIO;
+		}
+		break;
+	}
+	path->slots[level] = slot;
+	while(1) {
 		level--;
+		c = path->nodes[level];
+		free_extent_buffer(c);
+		slot = btrfs_header_nritems(next);
+		if (slot != 0)
+			slot--;
+		path->nodes[level] = next;
+		path->slots[level] = slot;
+		if (!level)
+			break;
+		next = read_node_slot(fs_info, next, slot);
+		if (!extent_buffer_uptodate(next)) {
+			if (IS_ERR(next))
+				return PTR_ERR(next);
+			return -EIO;
+		}
 	}
-
-	/* Free rewritten nodes in path */
-	for (i = 0; i <= from_level; ++i)
-		free(path->nodes[i]);
-
-	*path = p;
 	return 0;
-
-err:
-	/* Free rewritten nodes in p */
-	for (i = level + 1; i <= from_level; ++i)
-		free(p.nodes[i]);
-	return -1;
 }
 
-int btrfs_prev_slot(struct btrfs_path *p)
+/*
+ * Walk up the tree as far as necessary to find the next sibling tree block.
+ * More generic version of btrfs_next_leaf(), as it could find sibling nodes
+ * if @path->lowest_level is not 0.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+				  struct btrfs_path *path)
 {
-	if (!p->slots[0])
-		return jump_leaf(p, -1);
+	int slot;
+	int level = path->lowest_level + 1;
+	struct extent_buffer *c;
+	struct extent_buffer *next = NULL;
+
+	BUG_ON(path->lowest_level + 1 >= BTRFS_MAX_LEVEL);
+	do {
+		if (!path->nodes[level])
+			return 1;
 
-	p->slots[0]--;
+		slot = path->slots[level] + 1;
+		c = path->nodes[level];
+		if (slot >= btrfs_header_nritems(c)) {
+			level++;
+			if (level == BTRFS_MAX_LEVEL)
+				return 1;
+			continue;
+		}
+
+		next = read_node_slot(fs_info, c, slot);
+		if (!extent_buffer_uptodate(next))
+			return -EIO;
+		break;
+	} while (level < BTRFS_MAX_LEVEL);
+	path->slots[level] = slot;
+	while(1) {
+		level--;
+		c = path->nodes[level];
+		free_extent_buffer(c);
+		path->nodes[level] = next;
+		path->slots[level] = 0;
+		if (level == path->lowest_level)
+			break;
+		next = read_node_slot(fs_info, next, 0);
+		if (!extent_buffer_uptodate(next))
+			return -EIO;
+	}
 	return 0;
 }
 
-int btrfs_next_slot(struct btrfs_path *p)
+int btrfs_previous_item(struct btrfs_root *root,
+			struct btrfs_path *path, u64 min_objectid,
+			int type)
 {
-	struct btrfs_leaf *leaf = &p->nodes[0]->leaf;
-
-	if (p->slots[0] + 1 >= leaf->header.nritems)
-		return jump_leaf(p, 1);
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int ret;
+
+	while(1) {
+		if (path->slots[0] == 0) {
+			ret = btrfs_prev_leaf(root, path);
+			if (ret != 0)
+				return ret;
+		} else {
+			path->slots[0]--;
+		}
+		leaf = path->nodes[0];
+		nritems = btrfs_header_nritems(leaf);
+		if (nritems == 0)
+			return 1;
+		if (path->slots[0] == nritems)
+			path->slots[0]--;
 
-	p->slots[0]++;
-	return 0;
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		if (found_key.objectid < min_objectid)
+			break;
+		if (found_key.type == type)
+			return 0;
+		if (found_key.objectid == min_objectid &&
+		    found_key.type < type)
+			break;
+	}
+	return 1;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 65c152a52fc..219c410b189 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -11,16 +11,17 @@
 
 #include <common.h>
 #include <compiler.h>
-#include "btrfs_tree.h"
-
-#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+#include <linux/rbtree.h>
+#include <linux/bug.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <u-boot/crc.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "crypto/hash.h"
+#include "compat.h"
+#include "extent-io.h"
 
 #define BTRFS_MAX_MIRRORS 3
 
-#define BTRFS_MAX_LEVEL 8
-
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
 /*
  * the max metadata block size.  This limit is somewhat artificial,
  * but the memmove costs go through the roof for larger blocks.
@@ -28,23 +29,38 @@
 #define BTRFS_MAX_METADATA_BLOCKSIZE 65536
 
 /*
- * we can actually store much bigger names, but lets not confuse the rest
- * of linux
- */
-#define BTRFS_NAME_LEN 255
-
-/*
  * Theoretical limit is larger, but we keep this down to a sane
  * value. That should limit greatly the possibility of collisions on
  * inode ref items.
  */
 #define BTRFS_LINK_MAX 65535U
 
-static const int btrfs_csum_sizes[] = { 4 };
-
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
 
+struct btrfs_mapping_tree {
+	struct cache_tree cache_tree;
+};
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+	BUG_ON(num_stripes == 0);
+	return sizeof(struct btrfs_chunk) +
+		sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(fs_info) \
+				(__BTRFS_LEAF_DATA_SIZE(fs_info->nodesize))
+
+struct btrfs_path {
+	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+	int slots[BTRFS_MAX_LEVEL];
+
+	/* keep some upper locks as we walk down */
+	u8 lowest_level;
+};
+
 /* ioprio of readahead is set to idle */
 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
 
@@ -52,6 +68,92 @@ static const int btrfs_csum_sizes[] = { 4 };
 
 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
 
+enum btrfs_tree_block_status {
+	BTRFS_TREE_BLOCK_CLEAN,
+	BTRFS_TREE_BLOCK_INVALID_NRITEMS,
+	BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
+	BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
+	BTRFS_TREE_BLOCK_INVALID_LEVEL,
+	BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
+	BTRFS_TREE_BLOCK_INVALID_OFFSETS,
+};
+
+struct btrfs_root {
+	struct extent_buffer *node;
+	struct btrfs_root_item root_item;
+	struct btrfs_key root_key;
+	struct btrfs_fs_info *fs_info;
+	u64 objectid;
+	u64 last_trans;
+
+	int ref_cows;
+	int track_dirty;
+
+	u32 type;
+	u64 last_inode_alloc;
+
+	struct rb_node rb_node;
+};
+
+struct btrfs_trans_handle;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+	u8 *new_chunk_tree_uuid;
+	struct btrfs_root *fs_root;
+	struct btrfs_root *tree_root;
+	struct btrfs_root *chunk_root;
+	struct btrfs_root *csum_root;
+
+	struct rb_root fs_root_tree;
+
+	struct extent_io_tree extent_cache;
+
+	/* logical->physical extent mapping */
+	struct btrfs_mapping_tree mapping_tree;
+
+	u64 last_trans_committed;
+
+	struct btrfs_super_block *super_copy;
+
+	struct btrfs_fs_devices *fs_devices;
+
+	/* Cached block sizes */
+	u32 nodesize;
+	u32 sectorsize;
+	u32 stripesize;
+};
+
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
+{
+	return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
+{
+	return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_EXTENT_BUFFER(const struct extent_buffer *eb)
+{
+	BUG_ON(eb->fs_info && eb->fs_info->nodesize != eb->len);
+	return __BTRFS_LEAF_DATA_SIZE(eb->len) / sizeof(struct btrfs_key_ptr);
+}
+
+#define BTRFS_FILE_EXTENT_INLINE_DATA_START		\
+	(offsetof(struct btrfs_file_extent_item, disk_bytenr))
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
+{
+	return BTRFS_MAX_ITEM_SIZE(info) -
+		BTRFS_FILE_EXTENT_INLINE_DATA_START;
+}
+
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
+{
+	return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
+}
+
 /*
  * File system states
  */
@@ -61,273 +163,1136 @@ static const int btrfs_csum_sizes[] = { 4 };
 #define BTRFS_FS_STATE_DEV_REPLACING	3
 #define BTRFS_FS_STATE_DUMMY_FS_INFO	4
 
-#define BTRFS_BACKREF_REV_MAX		256
-#define BTRFS_BACKREF_REV_SHIFT		56
-#define BTRFS_BACKREF_REV_MASK		(((u64)BTRFS_BACKREF_REV_MAX - 1) << \
-					 BTRFS_BACKREF_REV_SHIFT)
+#define read_eb_member(eb, ptr, type, member, result) (			\
+	read_extent_buffer(eb, (char *)(result),			\
+			   ((unsigned long)(ptr)) +			\
+			    offsetof(type, member),			\
+			   sizeof(((type *)0)->member)))
 
-#define BTRFS_OLD_BACKREF_REV		0
-#define BTRFS_MIXED_BACKREF_REV		1
+#define write_eb_member(eb, ptr, type, member, result) (		\
+	write_extent_buffer(eb, (char *)(result),			\
+			   ((unsigned long)(ptr)) +			\
+			    offsetof(type, member),			\
+			   sizeof(((type *)0)->member)))
 
-/*
- * every tree block (leaf or node) starts with this header.
- */
-struct btrfs_header {
-	/* these first four must match the super block */
-	__u8 csum[BTRFS_CSUM_SIZE];
-	__u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
-	__u64 bytenr; /* which block this node is supposed to live in */
-	__u64 flags;
-
-	/* allowed to be different from the super from here on down */
-	__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-	__u64 generation;
-	__u64 owner;
-	__u32 nritems;
-	__u8 level;
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
+static inline u##bits btrfs_##name(const struct extent_buffer *eb)	\
+{									\
+	const struct btrfs_header *h = (struct btrfs_header *)eb->data;	\
+	return le##bits##_to_cpu(h->member);				\
+}									\
+static inline void btrfs_set_##name(struct extent_buffer *eb,		\
+				    u##bits val)			\
+{									\
+	struct btrfs_header *h = (struct btrfs_header *)eb->data;	\
+	h->member = cpu_to_le##bits(val);				\
+}
 
-/*
- * this is a very generous portion of the super block, giving us
- * room to translate 14 chunks with 3 stripes each.
- */
-#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_SETGET_FUNCS(name, type, member, bits)			\
+static inline u##bits btrfs_##name(const struct extent_buffer *eb,	\
+				   const type *s)			\
+{									\
+	unsigned long offset = (unsigned long)s;			\
+	const type *p = (type *) (eb->data + offset);			\
+	return get_unaligned_le##bits(&p->member);			\
+}									\
+static inline void btrfs_set_##name(struct extent_buffer *eb,		\
+				    type *s, u##bits val)		\
+{									\
+	unsigned long offset = (unsigned long)s;			\
+	type *p = (type *) (eb->data + offset);				\
+	put_unaligned_le##bits(val, &p->member);			\
+}
 
-/*
- * just in case we somehow lose the roots and are not able to mount,
- * we store an array of the roots from previous transactions
- * in the super.
- */
-#define BTRFS_NUM_BACKUP_ROOTS 4
-struct btrfs_root_backup {
-	__u64 tree_root;
-	__u64 tree_root_gen;
-
-	__u64 chunk_root;
-	__u64 chunk_root_gen;
-
-	__u64 extent_root;
-	__u64 extent_root_gen;
-
-	__u64 fs_root;
-	__u64 fs_root_gen;
-
-	__u64 dev_root;
-	__u64 dev_root_gen;
-
-	__u64 csum_root;
-	__u64 csum_root_gen;
-
-	__u64 total_bytes;
-	__u64 bytes_used;
-	__u64 num_devices;
-	/* future */
-	__u64 unused_64[4];
-
-	__u8 tree_root_level;
-	__u8 chunk_root_level;
-	__u8 extent_root_level;
-	__u8 fs_root_level;
-	__u8 dev_root_level;
-	__u8 csum_root_level;
-	/* future and to align */
-	__u8 unused_8[10];
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)		\
+static inline u##bits btrfs_##name(const type *s)			\
+{									\
+	return le##bits##_to_cpu(s->member);				\
+}									\
+static inline void btrfs_set_##name(type *s, u##bits val)		\
+{									\
+	s->member = cpu_to_le##bits(val);				\
+}
 
-/*
- * the super block basically lists the main trees of the FS
- * it currently lacks any block count etc etc
- */
-struct btrfs_super_block {
-	__u8 csum[BTRFS_CSUM_SIZE];
-	/* the first 4 fields must match struct btrfs_header */
-	__u8 fsid[BTRFS_FSID_SIZE];    /* FS specific uuid */
-	__u64 bytenr; /* this block number */
-	__u64 flags;
-
-	/* allowed to be different from the btrfs_header from here own down */
-	__u64 magic;
-	__u64 generation;
-	__u64 root;
-	__u64 chunk_root;
-	__u64 log_root;
-
-	/* this will help find the new super based on the log root */
-	__u64 log_root_transid;
-	__u64 total_bytes;
-	__u64 bytes_used;
-	__u64 root_dir_objectid;
-	__u64 num_devices;
-	__u32 sectorsize;
-	__u32 nodesize;
-	__u32 __unused_leafsize;
-	__u32 stripesize;
-	__u32 sys_chunk_array_size;
-	__u64 chunk_root_generation;
-	__u64 compat_flags;
-	__u64 compat_ro_flags;
-	__u64 incompat_flags;
-	__u16 csum_type;
-	__u8 root_level;
-	__u8 chunk_root_level;
-	__u8 log_root_level;
-	struct btrfs_dev_item dev_item;
-
-	char label[BTRFS_LABEL_SIZE];
-
-	__u64 cache_generation;
-	__u64 uuid_tree_generation;
-
-	/* future expansion */
-	__u64 reserved[30];
-	__u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
-	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
+		   start_offset, 64);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
 
-/*
- * Compat flags that we support.  If any incompat flags are set other than the
- * ones specified below then we will fail to mount
- */
-#define BTRFS_FEATURE_COMPAT_SUPP		0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_SET		0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL
-
-#define BTRFS_FEATURE_COMPAT_RO_SUPP			\
-	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
-	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
-
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
-
-#define BTRFS_FEATURE_INCOMPAT_SUPP			\
-	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
-	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
-	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
-	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
-	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
-	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
-	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
-	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\
-	 BTRFS_FEATURE_INCOMPAT_NO_HOLES)
-
-#define BTRFS_FEATURE_INCOMPAT_SAFE_SET			\
-	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
-#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR		0ULL
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+			 total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+			 bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+			 io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+			 io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+			 sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+			 dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+			 seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+			 bandwidth, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
+			 generation, 64);
 
-/*
- * A leaf is full of items. offset and size tell us where to find
- * the item in the leaf (relative to the start of the data area)
- */
-struct btrfs_item {
-	struct btrfs_key key;
-	__u32 offset;
-	__u32 size;
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+	return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
 
-/*
- * leaves have an item area and a data area:
- * [item0, item1....itemN] [free space] [dataN...data1, data0]
- *
- * The data is separate from the items to get the keys closer together
- * during searches.
- */
-struct btrfs_leaf {
-	struct btrfs_header header;
-	struct btrfs_item items[];
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+{
+	return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+	return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+			 stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+			 io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+			 io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+			 sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+			 num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+			 sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+						   int nr)
+{
+	unsigned long offset = (unsigned long)c;
+	offset += offsetof(struct btrfs_chunk, stripe);
+	offset += nr * sizeof(struct btrfs_stripe);
+	return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+	return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+					 struct btrfs_chunk *c, int nr)
+{
+	return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+					     struct btrfs_chunk *c, int nr,
+					     u64 val)
+{
+	btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+					 struct btrfs_chunk *c, int nr)
+{
+	return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+					     struct btrfs_chunk *c, int nr,
+					     u64 val)
+{
+	btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+			 used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+			 used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+			struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+		   struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+		   struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+			struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+		   extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_extref */
+BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref,
+		   parent_objectid, 64);
+BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref,
+		   name_len, 16);
+BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation,
+			 struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence,
+			 struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid,
+			 struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size,
+			 struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes,
+			 struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group,
+			 struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink,
+			 struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid,
+			 struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid,
+			 struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode,
+			 struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev,
+			 struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags,
+			 struct btrfs_inode_item, flags, 64);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+	unsigned long ptr = (unsigned long)inode_item;
+	ptr += offsetof(struct btrfs_inode_item, atime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+	unsigned long ptr = (unsigned long)inode_item;
+	ptr += offsetof(struct btrfs_inode_item, mtime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+	unsigned long ptr = (unsigned long)inode_item;
+	ptr += offsetof(struct btrfs_inode_item, ctime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+	unsigned long ptr = (unsigned long)inode_item;
+	ptr += offsetof(struct btrfs_inode_item, otime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec,
+			 sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec,
+			 nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+		   chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+		   chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+		   chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent,
+			 length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+	unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+	return (u8 *)((unsigned long)dev + ptr);
+}
+
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64);
+
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+					struct btrfs_tree_block_info *item,
+					struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+					    struct btrfs_tree_block_info *item,
+					    struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+		   root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+		   objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+		   offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+		   type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+		   offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type,
+			 struct btrfs_extent_inline_ref, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset,
+			 struct btrfs_extent_inline_ref, offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    type == BTRFS_SHARED_BLOCK_REF_KEY)
+		return sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_SHARED_DATA_REF_KEY)
+		return sizeof(struct btrfs_shared_data_ref) +
+		       sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_EXTENT_DATA_REF_KEY)
+		return sizeof(struct btrfs_extent_data_ref) +
+		       offsetof(struct btrfs_extent_inline_ref, offset);
+	BUG();
+	return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+	unsigned long ptr;
+	ptr = offsetof(struct btrfs_node, ptrs) +
+		sizeof(struct btrfs_key_ptr) * nr;
+	return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+					   int nr, u64 val)
+{
+	unsigned long ptr;
+	ptr = offsetof(struct btrfs_node, ptrs) +
+		sizeof(struct btrfs_key_ptr) * nr;
+	btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+	unsigned long ptr;
+	ptr = offsetof(struct btrfs_node, ptrs) +
+		sizeof(struct btrfs_key_ptr) * nr;
+	return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+						 int nr, u64 val)
+{
+	unsigned long ptr;
+	ptr = offsetof(struct btrfs_node, ptrs) +
+		sizeof(struct btrfs_key_ptr) * nr;
+	btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+	return offsetof(struct btrfs_node, ptrs) +
+		sizeof(struct btrfs_key_ptr) * nr;
+}
+
+static inline void btrfs_node_key(struct extent_buffer *eb,
+				  struct btrfs_disk_key *disk_key, int nr)
+{
+	unsigned long ptr;
+	ptr = btrfs_node_key_ptr_offset(nr);
+	read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+		       struct btrfs_key_ptr, key, disk_key);
+}
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+				      struct btrfs_disk_key *disk_key, int nr)
+{
+	unsigned long ptr;
+	ptr = btrfs_node_key_ptr_offset(nr);
+	write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+		       struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+	return offsetof(struct btrfs_leaf, items) +
+		sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(int nr)
+{
+	return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+				 struct btrfs_item *item)
+{
+	return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+	return btrfs_item_end(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
+{
+	return btrfs_item_offset(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+	return btrfs_item_size(eb, btrfs_item_nr(nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+			   struct btrfs_disk_key *disk_key, int nr)
+{
+	struct btrfs_item *item = btrfs_item_nr(nr);
+	read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+			       struct btrfs_disk_key *disk_key, int nr)
+{
+	struct btrfs_item *item = btrfs_item_nr(nr);
+	write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
 
 /*
- * all non-leaf blocks are nodes, they hold only keys and pointers to
- * other blocks
+ * struct btrfs_root_ref
  */
-struct btrfs_key_ptr {
-	struct btrfs_key key;
-	__u64 blockptr;
-	__u64 generation;
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
 
-struct btrfs_node {
-	struct btrfs_header header;
-	struct btrfs_key_ptr ptrs[];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16);
 
-union btrfs_tree_node {
-	struct btrfs_header header;
-	struct btrfs_leaf leaf;
-	struct btrfs_node node;
-};
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
 
-typedef __u8 u8;
-typedef __u16 u16;
-typedef __u32 u32;
-typedef __u64 u64;
+BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64);
 
-struct btrfs_path {
-	union btrfs_tree_node *nodes[BTRFS_MAX_LEVEL];
-	u32 slots[BTRFS_MAX_LEVEL];
-};
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+				      struct btrfs_dir_item *item,
+				      struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
 
-struct btrfs_root {
-	u64 objectid;
-	u64 bytenr;
-	u64 root_dirid;
-};
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+					  struct btrfs_dir_item *item,
+					  struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_free_space_header */
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+		   num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+		   num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+		   generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+					struct btrfs_free_space_header *h,
+					struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+					    struct btrfs_free_space_header *h,
+					    struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+			 objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+					 struct btrfs_disk_key *disk)
+{
+	cpu->offset = le64_to_cpu(disk->offset);
+	cpu->type = disk->type;
+	cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+					 const struct btrfs_key *cpu)
+{
+	disk->offset = cpu_to_le64(cpu->offset);
+	disk->type = cpu->type;
+	disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+				  struct btrfs_key *key, int nr)
+{
+	struct btrfs_disk_key disk_key;
+	btrfs_node_key(eb, &disk_key, nr);
+	btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+				  struct btrfs_key *key, int nr)
+{
+	struct btrfs_disk_key disk_key;
+	btrfs_item_key(eb, &disk_key, nr);
+	btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+				      struct btrfs_dir_item *item,
+				      struct btrfs_key *key)
+{
+	struct btrfs_disk_key disk_key;
+	btrfs_dir_item_key(eb, item, &disk_key);
+	btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+			  generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems,
+			 32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+			 generation, 64);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+	return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+	u64 flags = btrfs_header_flags(eb);
+	btrfs_set_header_flags(eb, flags | flag);
+	return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+	u64 flags = btrfs_header_flags(eb);
+	btrfs_set_header_flags(eb, flags & ~flag);
+	return (flags & flag) == flag;
+}
+
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+	u64 flags = btrfs_header_flags(eb);
+	return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+						int rev)
+{
+	u64 flags = btrfs_header_flags(eb);
+	flags &= ~BTRFS_BACKREF_REV_MASK;
+	flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+	btrfs_set_header_flags(eb, flags);
+}
+
+static inline unsigned long btrfs_header_fsid(void)
+{
+	return offsetof(struct btrfs_header, fsid);
+}
+
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+	return offsetof(struct btrfs_header, chunk_tree_uuid);
+}
 
-int btrfs_comp_keys(struct btrfs_key *, struct btrfs_key *);
-int btrfs_comp_keys_type(struct btrfs_key *, struct btrfs_key *);
-int btrfs_bin_search(union btrfs_tree_node *, struct btrfs_key *, int *);
-void btrfs_free_path(struct btrfs_path *);
-int btrfs_search_tree(const struct btrfs_root *, struct btrfs_key *,
-		      struct btrfs_path *);
-int btrfs_prev_slot(struct btrfs_path *);
-int btrfs_next_slot(struct btrfs_path *);
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+	unsigned long ptr = offsetof(struct btrfs_header, csum);
+	return (u8 *)ptr;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+	return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
+			 generation, 64);
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
+			 last_snapshot, 64);
+BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
+			 generation_v2, 64);
+BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
+			 ctransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
+			 otransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
+			 stransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
+			 rtransid, 64);
+
+static inline struct btrfs_timespec* btrfs_root_ctime(
+		struct btrfs_root_item *root_item)
+{
+	unsigned long ptr = (unsigned long)root_item;
+	ptr += offsetof(struct btrfs_root_item, ctime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_otime(
+		struct btrfs_root_item *root_item)
+{
+	unsigned long ptr = (unsigned long)root_item;
+	ptr += offsetof(struct btrfs_root_item, otime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_stime(
+		struct btrfs_root_item *root_item)
+{
+	unsigned long ptr = (unsigned long)root_item;
+	ptr += offsetof(struct btrfs_root_item, stime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_rtime(
+		struct btrfs_root_item *root_item)
+{
+	unsigned long ptr = (unsigned long)root_item;
+	ptr += offsetof(struct btrfs_root_item, rtime);
+	return (struct btrfs_timespec *)ptr;
+}
+
+/* struct btrfs_root_backup */
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+		   tree_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
+		   tree_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
+		   tree_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
+		   chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
+		   chunk_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
+		   chunk_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
+		   extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
+		   extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
+		   extent_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
+		   fs_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
+		   fs_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
+		   fs_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
+		   dev_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
+		   dev_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
+		   dev_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
+		   csum_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
+		   csum_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
+		   csum_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
+		   total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+		   bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+		   num_devices, 64);
+
+/* struct btrfs_super_block */
+
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+			 generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+			 struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
+			 struct btrfs_super_block, chunk_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+			 root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+			 chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+			 chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+			 log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
+			 log_root_transid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+			 log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+			 total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+			 bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+			 sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+			 nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+			 stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+			 root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+			 num_devices, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
+			 compat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
+			 compat_ro_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
+			 incompat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
+			 csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+			 cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+			 uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+	return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+						   btrfs_file_extent_item *e)
+{
+	unsigned long offset = (unsigned long)e;
+	offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+	return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+	return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+		   disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item,
+		   disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+		   generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+		   disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+		  offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item,
+		  offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+		   num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item,
+		   num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
+		   ram_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item,
+		   ram_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
+		   compression, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item,
+		   compression, 8);
+BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
+		   encryption, 8);
+BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
+		   other_encoding, 16);
+
+/* btrfs_qgroup_status_item */
+BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
+		   version, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
+		   flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+		   rescan, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version,
+			 struct btrfs_qgroup_status_item, version, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation,
+			 struct btrfs_qgroup_status_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags,
+			 struct btrfs_qgroup_status_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_rescan,
+			 struct btrfs_qgroup_status_item, rescan, 64);
+
+/* btrfs_qgroup_info_item */
+BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item,
+		   rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed,
+		   struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item,
+		   excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed,
+		   struct btrfs_qgroup_info_item, excl_cmpr, 64);
 
-static inline struct btrfs_key *btrfs_path_leaf_key(struct btrfs_path *p) {
-	return &p->nodes[0]->leaf.items[p->slots[0]].key;
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
+			 struct btrfs_qgroup_info_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced,
+			 struct btrfs_qgroup_info_item, rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed,
+		   struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive,
+			 struct btrfs_qgroup_info_item, excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed,
+		   struct btrfs_qgroup_info_item, excl_cmpr, 64);
+
+/* btrfs_qgroup_limit_item */
+BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
+		   flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item,
+		   max_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item,
+		   max_excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item,
+		   rsv_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item,
+		   rsv_excl, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags,
+			 struct btrfs_qgroup_limit_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced,
+			 struct btrfs_qgroup_limit_item, max_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive,
+			 struct btrfs_qgroup_limit_item, max_excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced,
+			 struct btrfs_qgroup_limit_item, rsv_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive,
+			 struct btrfs_qgroup_limit_item, rsv_excl, 64);
+
+/* btrfs_balance_item */
+BTRFS_SETGET_FUNCS(balance_item_flags, struct btrfs_balance_item, flags, 64);
+
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_data(
+		struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+	unsigned long offset = (unsigned long)bi;
+	struct btrfs_balance_item *p;
+	p = (struct btrfs_balance_item *)(eb->data + offset);
+	return &p->data;
 }
 
-static inline struct btrfs_key *
-btrfs_search_tree_key_type(const struct btrfs_root *root, u64 objectid,
-			   u8 type, struct btrfs_path *path)
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_meta(
+		struct extent_buffer *eb, struct btrfs_balance_item *bi)
 {
-	struct btrfs_key key, *res;
+	unsigned long offset = (unsigned long)bi;
+	struct btrfs_balance_item *p;
+	p = (struct btrfs_balance_item *)(eb->data + offset);
+	return &p->meta;
+}
 
-	key.objectid = objectid;
-	key.type = type;
-	key.offset = 0;
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_sys(
+		struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+	unsigned long offset = (unsigned long)bi;
+	struct btrfs_balance_item *p;
+	p = (struct btrfs_balance_item *)(eb->data + offset);
+	return &p->sys;
+}
 
-	if (btrfs_search_tree(root, &key, path))
-		return NULL;
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+					const struct btrfs_dev_stats_item *ptr,
+					int index)
+{
+	u64 val;
 
-	res = btrfs_path_leaf_key(path);
-	if (btrfs_comp_keys_type(&key, res)) {
-		btrfs_free_path(path);
-		return NULL;
-	}
+	read_extent_buffer(eb, &val,
+			   offsetof(struct btrfs_dev_stats_item, values) +
+			    ((unsigned long)ptr) + (index * sizeof(u64)),
+			   sizeof(val));
+	return val;
+}
 
-	return res;
+/*
+ * this returns the number of bytes used by the item on disk, minus the
+ * size of any extent headers.  If a file is compressed on disk, this is
+ * the compressed size
+ */
+static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
+						    struct btrfs_item *e)
+{
+	unsigned long offset;
+	offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+	return btrfs_item_size(eb, e) - offset;
 }
 
-static inline u32 btrfs_path_item_size(struct btrfs_path *p)
+#define btrfs_fs_incompat(fs_info, opt) \
+	__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
 {
-	return p->nodes[0]->leaf.items[p->slots[0]].size;
+	struct btrfs_super_block *disk_super;
+	disk_super = fs_info->super_copy;
+	return !!(btrfs_super_incompat_flags(disk_super) & flag);
 }
 
-static inline void *btrfs_leaf_data(struct btrfs_leaf *leaf, u32 slot)
+#define btrfs_fs_compat_ro(fs_info, opt) \
+	__btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
 {
-	return ((u8 *) leaf) + sizeof(struct btrfs_header)
-	       + leaf->items[slot].offset;
+	struct btrfs_super_block *disk_super;
+	disk_super = fs_info->super_copy;
+	return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
 }
 
-static inline void *btrfs_path_leaf_data(struct btrfs_path *p)
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+	((type *)(btrfs_leaf_data(leaf) + \
+	btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+	((unsigned long)(btrfs_leaf_data(leaf) + \
+	btrfs_item_offset_nr(leaf, slot)))
+
+static inline u64 btrfs_name_hash(const char *name, int len)
 {
-	return btrfs_leaf_data(&p->nodes[0]->leaf, p->slots[0]);
+	return (u64)crc32c((u32)~1, (u8 *)name, len);
 }
 
-#define btrfs_item_ptr(l,s,t)			\
-	((t *) btrfs_leaf_data((l),(s)))
+/*
+ * Figure the key offset of an extended inode ref
+ */
+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
+				    int len)
+{
+	return crc32(parent_objectid, (u8 *)name, len);
+}
+
+union btrfs_tree_node {
+	struct btrfs_header header;
+	struct btrfs_leaf leaf;
+	struct btrfs_node node;
+};
 
 #define btrfs_path_item_ptr(p,t)		\
 	((t *) btrfs_path_leaf_data((p)))
 
+u16 btrfs_super_csum_size(const struct btrfs_super_block *s);
+const char *btrfs_super_csum_name(u16 csum_type);
+u16 btrfs_csum_type_size(u16 csum_type);
+size_t btrfs_super_num_csums(void);
+
+/* root-tree.c */
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+			struct btrfs_root_item *item, struct btrfs_key *key);
+
+/* dir-item.c */
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+					     struct btrfs_root *root,
+					     struct btrfs_path *path, u64 dir,
+					     const char *name, int name_len,
+					     int mod);
+typedef int (*btrfs_iter_dir_callback_t)(struct btrfs_root *root,
+					 struct extent_buffer *eb,
+					 struct btrfs_dir_item *di);
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+		   btrfs_iter_dir_callback_t callback);
+/* inode.c */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+			struct btrfs_root **root_ret, u64 *ino_ret,
+			u8 *type_ret, int symlink_limit);
+int btrfs_read_extent_inline(struct btrfs_path *path,
+			     struct btrfs_file_extent_item *fi, char *dest);
+int btrfs_read_extent_reg(struct btrfs_path *path,
+			  struct btrfs_file_extent_item *fi, u64 offset,
+			  int len, char *dest);
+
+/* ctree.c */
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+		 struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+		 struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+				   struct extent_buffer *parent, int slot);
+int btrfs_previous_item(struct btrfs_root *root,
+			struct btrfs_path *path, u64 min_objectid,
+			int type);
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+				  struct btrfs_path *path);
+/*
+ * Walk up the tree as far as necessary to find the next leaf.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root,
+				  struct btrfs_path *path)
+{
+	path->lowest_level = 0;
+	return btrfs_next_sibling_tree_block(root->fs_info, path);
+}
+
+static inline int btrfs_next_item(struct btrfs_root *root,
+				  struct btrfs_path *p)
+{
+	++p->slots[0];
+	if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+		return btrfs_next_leaf(root, p);
+	return 0;
+}
+
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct extent_buffer *leaf);
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+		struct btrfs_root *root, const struct btrfs_key *key,
+		struct btrfs_path *p, int ins_len, int cow);
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+			       const struct btrfs_key *key,
+			       struct btrfs_path *p, int find_higher,
+			       int return_any);
+void btrfs_release_path(struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+static inline void btrfs_init_path(struct btrfs_path *p)
+{
+	memset(p, 0, sizeof(*p));
+}
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
+		     int *slot);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+		u64 iobjectid, u64 ioff, u8 key_type,
+		struct btrfs_key *found_key);
 #endif /* __BTRFS_CTREE_H__ */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 63b5bf0a860..aab197a6d55 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -6,119 +6,163 @@
  */
 
 #include "btrfs.h"
+#include "disk-io.h"
 
-static int verify_dir_item(struct btrfs_dir_item *item, u32 start, u32 total)
+static int verify_dir_item(struct btrfs_root *root,
+		    struct extent_buffer *leaf,
+		    struct btrfs_dir_item *dir_item)
 {
-	u16 max_len = BTRFS_NAME_LEN;
-	u32 end;
+	u16 namelen = BTRFS_NAME_LEN;
+	u8 type = btrfs_dir_type(leaf, dir_item);
 
-	if (item->type >= BTRFS_FT_MAX) {
-		printf("%s: invalid dir item type: %i\n", __func__, item->type);
+	if (type == BTRFS_FT_XATTR)
+		namelen = XATTR_NAME_MAX;
+
+	if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+		fprintf(stderr, "invalid dir item name len: %u\n",
+			(unsigned)btrfs_dir_data_len(leaf, dir_item));
 		return 1;
 	}
 
-	if (item->type == BTRFS_FT_XATTR)
-		max_len = 255; /* XATTR_NAME_MAX */
-
-	end = start + sizeof(*item) + item->name_len;
-	if (item->name_len > max_len || end > total) {
-		printf("%s: invalid dir item name len: %u\n", __func__,
-		       item->name_len);
+	/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+	if ((btrfs_dir_data_len(leaf, dir_item) +
+	     btrfs_dir_name_len(leaf, dir_item)) >
+			BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
+		fprintf(stderr, "invalid dir item name + data len: %u + %u\n",
+			(unsigned)btrfs_dir_name_len(leaf, dir_item),
+			(unsigned)btrfs_dir_data_len(leaf, dir_item));
 		return 1;
 	}
 
 	return 0;
 }
 
-static struct btrfs_dir_item *
-btrfs_match_dir_item_name(struct btrfs_path *path, const char *name,
-			  int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+			      struct btrfs_path *path,
+			      const char *name, int name_len)
 {
-	struct btrfs_dir_item *item;
-	u32 total_len, cur = 0, this_len;
-	const char *name_ptr;
-
-	item = btrfs_path_item_ptr(path, struct btrfs_dir_item);
-
-	total_len = btrfs_path_item_size(path);
+	struct btrfs_dir_item *dir_item;
+	unsigned long name_ptr;
+	u32 total_len;
+	u32 cur = 0;
+	u32 this_len;
+	struct extent_buffer *leaf;
+
+	leaf = path->nodes[0];
+	dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+	total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+	if (verify_dir_item(root, leaf, dir_item))
+		return NULL;
+
+	while(cur < total_len) {
+		this_len = sizeof(*dir_item) +
+			btrfs_dir_name_len(leaf, dir_item) +
+			btrfs_dir_data_len(leaf, dir_item);
+		if (this_len > (total_len - cur)) {
+			fprintf(stderr, "invalid dir item size\n");
+			return NULL;
+		}
 
-	while (cur < total_len) {
-		btrfs_dir_item_to_cpu(item);
-		this_len = sizeof(*item) + item->name_len + item->data_len;
-		name_ptr = (const char *) (item + 1);
+		name_ptr = (unsigned long)(dir_item + 1);
 
-		if (verify_dir_item(item, cur, total_len))
-			return NULL;
-		if (item->name_len == name_len && !memcmp(name_ptr, name,
-							  name_len))
-			return item;
+		if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+		    memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+			return dir_item;
 
 		cur += this_len;
-		item = (struct btrfs_dir_item *) ((u8 *) item + this_len);
+		dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+						     this_len);
 	}
-
 	return NULL;
 }
 
-int btrfs_lookup_dir_item(const struct btrfs_root *root, u64 dir,
-			  const char *name, int name_len,
-			  struct btrfs_dir_item *item)
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+					     struct btrfs_root *root,
+					     struct btrfs_path *path, u64 dir,
+					     const char *name, int name_len,
+					     int mod)
 {
-	struct btrfs_path path;
+	int ret;
 	struct btrfs_key key;
-	struct btrfs_dir_item *res = NULL;
+	int ins_len = mod < 0 ? -1 : 0;
+	int cow = mod != 0;
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
 
 	key.objectid = dir;
 	key.type = BTRFS_DIR_ITEM_KEY;
+
 	key.offset = btrfs_name_hash(name, name_len);
 
-	if (btrfs_search_tree(root, &key, &path))
-		return -1;
+	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (ret > 0) {
+		if (path->slots[0] == 0)
+			return NULL;
+		path->slots[0]--;
+	}
 
-	if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-		goto out;
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-	res = btrfs_match_dir_item_name(&path, name, name_len);
-	if (res)
-		*item = *res;
-out:
-	btrfs_free_path(&path);
-	return res ? 0 : -1;
+	if (found_key.objectid != dir ||
+	    found_key.type != BTRFS_DIR_ITEM_KEY ||
+	    found_key.offset != key.offset)
+		return NULL;
+
+	return btrfs_match_dir_item_name(root, path, name, name_len);
 }
 
-int btrfs_readdir(const struct btrfs_root *root, u64 dir,
-		  btrfs_readdir_callback_t callback)
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+		   btrfs_iter_dir_callback_t callback)
 {
 	struct btrfs_path path;
-	struct btrfs_key key, *found_key;
-	struct btrfs_dir_item *item;
-	int res = 0;
+	struct btrfs_key key;
+	int ret;
 
-	key.objectid = dir;
+	btrfs_init_path(&path);
+	key.objectid = ino;
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = 0;
 
-	if (btrfs_search_tree(root, &key, &path))
-		return -1;
-
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	/* Should not happen */
+	if (ret == 0) {
+		ret = -EUCLEAN;
+		goto out;
+	}
+	if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
+		ret = btrfs_next_leaf(root, &path);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			ret = 0;
+			goto out;
+		}
+	}
 	do {
-		found_key = btrfs_path_leaf_key(&path);
-		if (btrfs_comp_keys_type(&key, found_key))
-			break;
-
-		item = btrfs_path_item_ptr(&path, struct btrfs_dir_item);
-		btrfs_dir_item_to_cpu(item);
-
-		if (verify_dir_item(item, 0, sizeof(*item) + item->name_len))
-			continue;
-		if (item->type == BTRFS_FT_XATTR)
-			continue;
+		struct btrfs_dir_item *di;
 
-		if (callback(root, item))
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		if (key.objectid != ino || key.type != BTRFS_DIR_INDEX_KEY)
 			break;
-	} while (!(res = btrfs_next_slot(&path)));
-
-	btrfs_free_path(&path);
-
-	return res < 0 ? -1 : 0;
+		di = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				    struct btrfs_dir_item);
+		if (verify_dir_item(root, path.nodes[0], di)) {
+			ret = -EUCLEAN;
+			goto out;
+		}
+		ret = callback(root, path.nodes[0], di);
+		if (ret < 0)
+			goto out;
+	} while (!(ret = btrfs_next_item(root, &path)));
+
+	if (ret > 0)
+		ret = 0;
+out:
+	btrfs_release_path(&path);
+	return ret;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644
index 00000000000..01e7cee5201
--- /dev/null
+++ b/fs/btrfs/disk-io.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <common.h>
+#include <fs_internal.h>
+#include <uuid.h>
+#include <memalign.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "common/rbtree-utils.h"
+#include "disk-io.h"
+#include "ctree.h"
+#include "btrfs.h"
+#include "volumes.h"
+#include "extent-io.h"
+#include "crypto/hash.h"
+
+/* specified errno for check_tree_block */
+#define BTRFS_BAD_BYTENR		(-1)
+#define BTRFS_BAD_FSID			(-2)
+#define BTRFS_BAD_LEVEL			(-3)
+#define BTRFS_BAD_NRITEMS		(-4)
+
+/* Calculate max possible nritems for a leaf/node */
+static u32 max_nritems(u8 level, u32 nodesize)
+{
+
+	if (level == 0)
+		return ((nodesize - sizeof(struct btrfs_header)) /
+			sizeof(struct btrfs_item));
+	return ((nodesize - sizeof(struct btrfs_header)) /
+		sizeof(struct btrfs_key_ptr));
+}
+
+static int check_tree_block(struct btrfs_fs_info *fs_info,
+			    struct extent_buffer *buf)
+{
+
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	u32 nodesize = fs_info->nodesize;
+	bool fsid_match = false;
+	int ret = BTRFS_BAD_FSID;
+
+	if (buf->start != btrfs_header_bytenr(buf))
+		return BTRFS_BAD_BYTENR;
+	if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
+		return BTRFS_BAD_LEVEL;
+	if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
+						    nodesize))
+		return BTRFS_BAD_NRITEMS;
+
+	/* Only leaf can be empty */
+	if (btrfs_header_nritems(buf) == 0 &&
+	    btrfs_header_level(buf) != 0)
+		return BTRFS_BAD_NRITEMS;
+
+	while (fs_devices) {
+		/*
+		 * Checking the incompat flag is only valid for the current
+		 * fs. For seed devices it's forbidden to have their uuid
+		 * changed so reading ->fsid in this case is fine
+		 */
+		if (fs_devices == fs_info->fs_devices &&
+		    btrfs_fs_incompat(fs_info, METADATA_UUID))
+			fsid_match = !memcmp_extent_buffer(buf,
+						   fs_devices->metadata_uuid,
+						   btrfs_header_fsid(),
+						   BTRFS_FSID_SIZE);
+		else
+			fsid_match = !memcmp_extent_buffer(buf,
+						    fs_devices->fsid,
+						    btrfs_header_fsid(),
+						    BTRFS_FSID_SIZE);
+
+
+		if (fsid_match) {
+			ret = 0;
+			break;
+		}
+		fs_devices = fs_devices->seed;
+	}
+	return ret;
+}
+
+static void print_tree_block_error(struct btrfs_fs_info *fs_info,
+				struct extent_buffer *eb,
+				int err)
+{
+	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+	char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+	u8 buf[BTRFS_UUID_SIZE];
+
+	if (!err)
+		return;
+
+	fprintf(stderr, "bad tree block %llu, ", eb->start);
+	switch (err) {
+	case BTRFS_BAD_FSID:
+		read_extent_buffer(eb, buf, btrfs_header_fsid(),
+				   BTRFS_UUID_SIZE);
+		uuid_unparse(buf, found_uuid);
+		uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
+		fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
+			fs_uuid, found_uuid);
+		break;
+	case BTRFS_BAD_BYTENR:
+		fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
+			eb->start, btrfs_header_bytenr(eb));
+		break;
+	case BTRFS_BAD_LEVEL:
+		fprintf(stderr, "bad level, %u > %d\n",
+			btrfs_header_level(eb), BTRFS_MAX_LEVEL);
+		break;
+	case BTRFS_BAD_NRITEMS:
+		fprintf(stderr, "invalid nr_items: %u\n",
+			btrfs_header_nritems(eb));
+		break;
+	}
+}
+
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
+{
+	memset(out, 0, BTRFS_CSUM_SIZE);
+
+	switch (csum_type) {
+	case BTRFS_CSUM_TYPE_CRC32:
+		return hash_crc32c(data, len, out);
+	case BTRFS_CSUM_TYPE_XXHASH:
+		return hash_xxhash(data, len, out);
+	case BTRFS_CSUM_TYPE_SHA256:
+		return hash_sha256(data, len, out);
+	default:
+		printf("Unknown csum type %d\n", csum_type);
+		return -EINVAL;
+	}
+}
+
+/*
+ * Check if the super is valid:
+ * - nodesize/sectorsize - minimum, maximum, alignment
+ * - tree block starts   - alignment
+ * - number of devices   - something sane
+ * - sys array size      - maximum
+ */
+static int btrfs_check_super(struct btrfs_super_block *sb)
+{
+	u8 result[BTRFS_CSUM_SIZE];
+	u16 csum_type;
+	int csum_size;
+	u8 *metadata_uuid;
+
+	if (btrfs_super_magic(sb) != BTRFS_MAGIC)
+		return -EIO;
+
+	csum_type = btrfs_super_csum_type(sb);
+	if (csum_type >= btrfs_super_num_csums()) {
+		error("unsupported checksum algorithm %u", csum_type);
+		return -EIO;
+	}
+	csum_size = btrfs_super_csum_size(sb);
+
+	btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
+			result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+
+	if (memcmp(result, sb->csum, csum_size)) {
+		error("superblock checksum mismatch");
+		return -EIO;
+	}
+	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		error("tree_root level too big: %d >= %d",
+			btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+		goto error_out;
+	}
+	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		error("chunk_root level too big: %d >= %d",
+			btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+		goto error_out;
+	}
+	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		error("log_root level too big: %d >= %d",
+			btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+		goto error_out;
+	}
+
+	if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
+		error("tree_root block unaligned: %llu", btrfs_super_root(sb));
+		goto error_out;
+	}
+	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
+		error("chunk_root block unaligned: %llu",
+			btrfs_super_chunk_root(sb));
+		goto error_out;
+	}
+	if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
+		error("log_root block unaligned: %llu",
+			btrfs_super_log_root(sb));
+		goto error_out;
+	}
+	if (btrfs_super_nodesize(sb) < 4096) {
+		error("nodesize too small: %u < 4096",
+			btrfs_super_nodesize(sb));
+		goto error_out;
+	}
+	if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
+		error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
+		goto error_out;
+	}
+	if (btrfs_super_sectorsize(sb) < 4096) {
+		error("sectorsize too small: %u < 4096",
+			btrfs_super_sectorsize(sb));
+		goto error_out;
+	}
+	if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
+		error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
+		goto error_out;
+	}
+	if (btrfs_super_total_bytes(sb) == 0) {
+		error("invalid total_bytes 0");
+		goto error_out;
+	}
+	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+		error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
+		goto error_out;
+	}
+	if ((btrfs_super_stripesize(sb) != 4096)
+		&& (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
+		error("invalid stripesize %u", btrfs_super_stripesize(sb));
+		goto error_out;
+	}
+
+	if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
+		metadata_uuid = sb->metadata_uuid;
+	else
+		metadata_uuid = sb->fsid;
+
+	if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+		char fsid[BTRFS_UUID_UNPARSED_SIZE];
+		char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
+
+		uuid_unparse(sb->metadata_uuid, fsid);
+		uuid_unparse(sb->dev_item.fsid, dev_fsid);
+		error("dev_item UUID does not match fsid: %s != %s",
+			dev_fsid, fsid);
+		goto error_out;
+	}
+
+	/*
+	 * Hint to catch really bogus numbers, bitflips or so
+	 */
+	if (btrfs_super_num_devices(sb) > (1UL << 31)) {
+		error("suspicious number of devices: %llu",
+			btrfs_super_num_devices(sb));
+	}
+
+	if (btrfs_super_num_devices(sb) == 0) {
+		error("number of devices is 0");
+		goto error_out;
+	}
+
+	/*
+	 * Obvious sys_chunk_array corruptions, it must hold at least one key
+	 * and one chunk
+	 */
+	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+		error("system chunk array too big %u > %u",
+		      btrfs_super_sys_array_size(sb),
+		      BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+		goto error_out;
+	}
+	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+			+ sizeof(struct btrfs_chunk)) {
+		error("system chunk array too small %u < %zu",
+		      btrfs_super_sys_array_size(sb),
+		      sizeof(struct btrfs_disk_key) +
+		      sizeof(struct btrfs_chunk));
+		goto error_out;
+	}
+
+	return 0;
+
+error_out:
+	error("superblock checksum matches but it has invalid members");
+	return -EIO;
+}
+
+/*
+ * btrfs_read_dev_super - read a valid primary superblock from a block device
+ * @desc,@part:	file descriptor of the device
+ * @sb:		buffer where the superblock is going to be read in
+ *
+ * Unlike the btrfs-progs/kernel version, here we ony care about the first
+ * super block, thus it's much simpler.
+ */
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+			 struct btrfs_super_block *sb)
+{
+	char tmp[BTRFS_SUPER_INFO_SIZE];
+	struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
+	int ret;
+
+	ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
+			      BTRFS_SUPER_INFO_OFFSET);
+	if (ret < BTRFS_SUPER_INFO_SIZE)
+		return -EIO;
+
+	if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
+		return -EIO;
+
+	if (btrfs_check_super(buf))
+		return -EIO;
+
+	memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
+	return 0;
+}
+
+static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
+				  int verify, int silent, u16 csum_type)
+{
+	u8 result[BTRFS_CSUM_SIZE];
+	u32 len;
+
+	len = buf->len - BTRFS_CSUM_SIZE;
+	btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
+			result, len);
+
+	if (verify) {
+		if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
+			/* FIXME: format */
+			if (!silent)
+				printk("checksum verify failed on %llu found %08X wanted %08X\n",
+				       (unsigned long long)buf->start,
+				       result[0],
+				       buf->data[0]);
+			return 1;
+		}
+	} else {
+		write_extent_buffer(buf, result, 0, csum_size);
+	}
+	return 0;
+}
+
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
+			 u16 csum_type)
+{
+	return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
+}
+
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+			   struct extent_buffer *buf, int verify)
+{
+	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+	u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+
+	return csum_tree_block_size(buf, csum_size, verify, csum_type);
+}
+
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+					    u64 bytenr, u32 blocksize)
+{
+	return find_extent_buffer(&fs_info->extent_cache,
+				  bytenr, blocksize);
+}
+
+struct extent_buffer* btrfs_find_create_tree_block(
+		struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+	return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+				 struct extent_buffer *eb, u64 parent_transid,
+				 int ignore)
+{
+	int ret;
+
+	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+		return 0;
+
+	if (extent_buffer_uptodate(eb) &&
+	    btrfs_header_generation(eb) == parent_transid) {
+		ret = 0;
+		goto out;
+	}
+	printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+	       (unsigned long long)eb->start,
+	       (unsigned long long)parent_transid,
+	       (unsigned long long)btrfs_header_generation(eb));
+	if (ignore) {
+		eb->flags |= EXTENT_BAD_TRANSID;
+		printk("Ignoring transid failure\n");
+		return 0;
+	}
+
+	ret = 1;
+out:
+	clear_extent_buffer_uptodate(eb);
+	return ret;
+
+}
+
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
+{
+	unsigned long offset = 0;
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	int ret = 0;
+	u64 read_len;
+	unsigned long bytes_left = eb->len;
+
+	while (bytes_left) {
+		read_len = bytes_left;
+		device = NULL;
+
+		ret = btrfs_map_block(info, READ, eb->start + offset,
+				      &read_len, &multi, mirror, NULL);
+		if (ret) {
+			printk("Couldn't map the block %Lu\n", eb->start + offset);
+			kfree(multi);
+			return -EIO;
+		}
+		device = multi->stripes[0].dev;
+
+		if (!device->desc || !device->part) {
+			kfree(multi);
+			return -EIO;
+		}
+
+		if (read_len > bytes_left)
+			read_len = bytes_left;
+
+		ret = read_extent_from_disk(device->desc, device->part,
+					    multi->stripes[0].physical, eb,
+					    offset, read_len);
+		kfree(multi);
+		multi = NULL;
+
+		if (ret)
+			return -EIO;
+		offset += read_len;
+		bytes_left -= read_len;
+	}
+	return 0;
+}
+
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+		u64 parent_transid)
+{
+	int ret;
+	struct extent_buffer *eb;
+	u64 best_transid = 0;
+	u32 sectorsize = fs_info->sectorsize;
+	int mirror_num = 1;
+	int good_mirror = 0;
+	int candidate_mirror = 0;
+	int num_copies;
+	int ignore = 0;
+
+	/*
+	 * Don't even try to create tree block for unaligned tree block
+	 * bytenr.
+	 * Such unaligned tree block will free overlapping extent buffer,
+	 * causing use-after-free bugs for fuzzed images.
+	 */
+	if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
+		error("tree block bytenr %llu is not aligned to sectorsize %u",
+		      bytenr, sectorsize);
+		return ERR_PTR(-EIO);
+	}
+
+	eb = btrfs_find_create_tree_block(fs_info, bytenr);
+	if (!eb)
+		return ERR_PTR(-ENOMEM);
+
+	if (btrfs_buffer_uptodate(eb, parent_transid))
+		return eb;
+
+	num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
+	while (1) {
+		ret = read_whole_eb(fs_info, eb, mirror_num);
+		if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
+		    check_tree_block(fs_info, eb) == 0 &&
+		    verify_parent_transid(&fs_info->extent_cache, eb,
+					  parent_transid, ignore) == 0) {
+			/*
+			 * check_tree_block() is less strict to allow btrfs
+			 * check to get raw eb with bad key order and fix it.
+			 * But we still need to try to get a good copy if
+			 * possible, or bad key order can go into tools like
+			 * btrfs ins dump-tree.
+			 */
+			if (btrfs_header_level(eb))
+				ret = btrfs_check_node(fs_info, NULL, eb);
+			else
+				ret = btrfs_check_leaf(fs_info, NULL, eb);
+			if (!ret || candidate_mirror == mirror_num) {
+				btrfs_set_buffer_uptodate(eb);
+				return eb;
+			}
+			if (candidate_mirror <= 0)
+				candidate_mirror = mirror_num;
+		}
+		if (ignore) {
+			if (candidate_mirror > 0) {
+				mirror_num = candidate_mirror;
+				continue;
+			}
+			if (check_tree_block(fs_info, eb))
+				print_tree_block_error(fs_info, eb,
+						check_tree_block(fs_info, eb));
+			else
+				fprintf(stderr, "Csum didn't match\n");
+			ret = -EIO;
+			break;
+		}
+		if (num_copies == 1) {
+			ignore = 1;
+			continue;
+		}
+		if (btrfs_header_generation(eb) > best_transid) {
+			best_transid = btrfs_header_generation(eb);
+			good_mirror = mirror_num;
+		}
+		mirror_num++;
+		if (mirror_num > num_copies) {
+			if (candidate_mirror > 0)
+				mirror_num = candidate_mirror;
+			else
+				mirror_num = good_mirror;
+			ignore = 1;
+			continue;
+		}
+	}
+	/*
+	 * We failed to read this tree block, it be should deleted right now
+	 * to avoid stale cache populate the cache.
+	 */
+	free_extent_buffer(eb);
+	return ERR_PTR(ret);
+}
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+		     u64 *len, int mirror)
+{
+	u64 offset = 0;
+	struct btrfs_multi_bio *multi = NULL;
+	struct btrfs_device *device;
+	int ret = 0;
+	u64 max_len = *len;
+
+	ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
+			      NULL);
+	if (ret) {
+		fprintf(stderr, "Couldn't map the block %llu\n",
+				logical + offset);
+		goto err;
+	}
+	device = multi->stripes[0].dev;
+
+	if (*len > max_len)
+		*len = max_len;
+	if (!device->desc || !device->part) {
+		ret = -EIO;
+		goto err;
+	}
+
+	ret = __btrfs_devread(device->desc, device->part, data, *len,
+			      multi->stripes[0].physical);
+	if (ret != *len)
+		ret = -EIO;
+	else
+		ret = 0;
+err:
+	kfree(multi);
+	return ret;
+}
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+		      u64 objectid)
+{
+	root->node = NULL;
+	root->track_dirty = 0;
+
+	root->fs_info = fs_info;
+	root->objectid = objectid;
+	root->last_trans = 0;
+	root->last_inode_alloc = 0;
+
+	memset(&root->root_key, 0, sizeof(root->root_key));
+	memset(&root->root_item, 0, sizeof(root->root_item));
+	root->root_key.objectid = objectid;
+}
+
+static int find_and_setup_root(struct btrfs_root *tree_root,
+			       struct btrfs_fs_info *fs_info,
+			       u64 objectid, struct btrfs_root *root)
+{
+	int ret;
+	u64 generation;
+
+	btrfs_setup_root(root, fs_info, objectid);
+	ret = btrfs_find_last_root(tree_root, objectid,
+				   &root->root_item, &root->root_key);
+	if (ret)
+		return ret;
+
+	generation = btrfs_root_generation(&root->root_item);
+	root->node = read_tree_block(fs_info,
+			btrfs_root_bytenr(&root->root_item), generation);
+	if (!extent_buffer_uptodate(root->node))
+		return -EIO;
+
+	return 0;
+}
+
+int btrfs_free_fs_root(struct btrfs_root *root)
+{
+	if (root->node)
+		free_extent_buffer(root->node);
+	kfree(root);
+	return 0;
+}
+
+static void __free_fs_root(struct rb_node *node)
+{
+	struct btrfs_root *root;
+
+	root = container_of(node, struct btrfs_root, rb_node);
+	btrfs_free_fs_root(root);
+}
+
+FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
+
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+					       struct btrfs_key *location)
+{
+	struct btrfs_root *root;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_path *path;
+	struct extent_buffer *l;
+	u64 generation;
+	int ret = 0;
+
+	root = calloc(1, sizeof(*root));
+	if (!root)
+		return ERR_PTR(-ENOMEM);
+	if (location->offset == (u64)-1) {
+		ret = find_and_setup_root(tree_root, fs_info,
+					  location->objectid, root);
+		if (ret) {
+			free(root);
+			return ERR_PTR(ret);
+		}
+		goto insert;
+	}
+
+	btrfs_setup_root(root, fs_info,
+			 location->objectid);
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		free(root);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
+	if (ret != 0) {
+		if (ret > 0)
+			ret = -ENOENT;
+		goto out;
+	}
+	l = path->nodes[0];
+	read_extent_buffer(l, &root->root_item,
+	       btrfs_item_ptr_offset(l, path->slots[0]),
+	       sizeof(root->root_item));
+	memcpy(&root->root_key, location, sizeof(*location));
+
+	/* If this root is already an orphan, no need to read */
+	if (btrfs_root_refs(&root->root_item) == 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	if (ret) {
+		free(root);
+		return ERR_PTR(ret);
+	}
+	generation = btrfs_root_generation(&root->root_item);
+	root->node = read_tree_block(fs_info,
+			btrfs_root_bytenr(&root->root_item), generation);
+	if (!extent_buffer_uptodate(root->node)) {
+		free(root);
+		return ERR_PTR(-EIO);
+	}
+insert:
+	root->ref_cows = 1;
+	return root;
+}
+
+static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
+					    void *data)
+{
+	u64 objectid = *((u64 *)data);
+	struct btrfs_root *root;
+
+	root = rb_entry(node, struct btrfs_root, rb_node);
+	if (objectid > root->objectid)
+		return 1;
+	else if (objectid < root->objectid)
+		return -1;
+	else
+		return 0;
+}
+
+int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
+{
+	struct btrfs_root *root;
+
+	root = rb_entry(node2, struct btrfs_root, rb_node);
+	return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
+}
+
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+				      struct btrfs_key *location)
+{
+	struct btrfs_root *root;
+	struct rb_node *node;
+	int ret;
+	u64 objectid = location->objectid;
+
+	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
+		return fs_info->tree_root;
+	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+		return fs_info->chunk_root;
+	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
+		return fs_info->csum_root;
+	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
+	       location->offset != (u64)-1);
+
+	node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
+			 btrfs_fs_roots_compare_objectids, NULL);
+	if (node)
+		return container_of(node, struct btrfs_root, rb_node);
+
+	root = btrfs_read_fs_root_no_cache(fs_info, location);
+	if (IS_ERR(root))
+		return root;
+
+	ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
+			btrfs_fs_roots_compare_roots);
+	BUG_ON(ret);
+	return root;
+}
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
+{
+	free(fs_info->tree_root);
+	free(fs_info->chunk_root);
+	free(fs_info->csum_root);
+	free(fs_info->super_copy);
+	free(fs_info);
+}
+
+struct btrfs_fs_info *btrfs_new_fs_info(void)
+{
+	struct btrfs_fs_info *fs_info;
+
+	fs_info = calloc(1, sizeof(struct btrfs_fs_info));
+	if (!fs_info)
+		return NULL;
+
+	fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
+	fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
+	fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
+	fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
+
+	if (!fs_info->tree_root || !fs_info->chunk_root ||
+	    !fs_info->csum_root || !fs_info->super_copy)
+		goto free_all;
+
+	extent_io_tree_init(&fs_info->extent_cache);
+
+	fs_info->fs_root_tree = RB_ROOT;
+	cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
+	mutex_init(&fs_info->fs_mutex);
+
+	return fs_info;
+free_all:
+	btrfs_free_fs_info(fs_info);
+	return NULL;
+}
+
+static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
+				      struct btrfs_root *info_root,
+				      u64 objectid, char *str)
+{
+	struct btrfs_root *root = fs_info->tree_root;
+	int ret;
+
+	ret = find_and_setup_root(root, fs_info, objectid, info_root);
+	if (ret) {
+		error("could not setup %s tree", str);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_super_block *sb = fs_info->super_copy;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	u64 root_tree_bytenr;
+	u64 generation;
+	int ret;
+
+	root = fs_info->tree_root;
+	btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
+	generation = btrfs_super_generation(sb);
+
+	root_tree_bytenr = btrfs_super_root(sb);
+
+	root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
+	if (!extent_buffer_uptodate(root->node)) {
+		fprintf(stderr, "Couldn't read tree root\n");
+		return -EIO;
+	}
+
+	ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
+					 BTRFS_CSUM_TREE_OBJECTID, "csum");
+	if (ret)
+		return ret;
+	fs_info->csum_root->track_dirty = 1;
+
+	fs_info->last_trans_committed = generation;
+
+	key.objectid = BTRFS_FS_TREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
+
+	if (IS_ERR(fs_info->fs_root))
+		return -EIO;
+	return 0;
+}
+
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
+{
+	if (fs_info->csum_root)
+		free_extent_buffer(fs_info->csum_root->node);
+	if (fs_info->tree_root)
+		free_extent_buffer(fs_info->tree_root->node);
+	if (fs_info->chunk_root)
+		free_extent_buffer(fs_info->chunk_root->node);
+}
+
+static void free_map_lookup(struct cache_extent *ce)
+{
+	struct map_lookup *map;
+
+	map = container_of(ce, struct map_lookup, ce);
+	kfree(map);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
+
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
+{
+	free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
+	extent_io_tree_cleanup(&fs_info->extent_cache);
+}
+
+static int btrfs_scan_fs_devices(struct blk_desc *desc,
+				 struct disk_partition *part,
+				 struct btrfs_fs_devices **fs_devices)
+{
+	u64 total_devs;
+	int ret;
+
+	if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+		     desc->blksz) > (part->size << desc->log2blksz)) {
+		error("superblock end %u is larger than device size " LBAFU,
+				BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+				part->size << desc->log2blksz);
+		return -EINVAL;
+	}
+
+	ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
+	if (ret) {
+		fprintf(stderr, "No valid Btrfs found\n");
+		return ret;
+	}
+	return 0;
+}
+
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
+{
+	u64 features;
+
+	features = btrfs_super_incompat_flags(sb) &
+		   ~BTRFS_FEATURE_INCOMPAT_SUPP;
+	if (features) {
+		printk("couldn't open because of unsupported "
+		       "option features (%llx).\n",
+		       (unsigned long long)features);
+		return -ENOTSUPP;
+	}
+
+	features = btrfs_super_incompat_flags(sb);
+	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+		btrfs_set_super_incompat_flags(sb, features);
+	}
+
+	return 0;
+}
+
+static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_super_block *sb = fs_info->super_copy;
+	u64 chunk_root_bytenr;
+	u64 generation;
+	int ret;
+
+	btrfs_setup_root(fs_info->chunk_root, fs_info,
+			BTRFS_CHUNK_TREE_OBJECTID);
+
+	ret = btrfs_read_sys_array(fs_info);
+	if (ret)
+		return ret;
+
+	generation = btrfs_super_chunk_root_generation(sb);
+	chunk_root_bytenr = btrfs_super_chunk_root(sb);
+
+	fs_info->chunk_root->node = read_tree_block(fs_info,
+						    chunk_root_bytenr,
+						    generation);
+	if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
+		error("cannot read chunk root");
+		return -EIO;
+	}
+
+	ret = btrfs_read_chunk_tree(fs_info);
+	if (ret) {
+		fprintf(stderr, "Couldn't read chunk tree\n");
+		return ret;
+	}
+	return 0;
+}
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+					 struct disk_partition *part)
+{
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_super_block *disk_super;
+	struct btrfs_fs_devices *fs_devices = NULL;
+	struct extent_buffer *eb;
+	int ret;
+
+	fs_info = btrfs_new_fs_info();
+	if (!fs_info) {
+		fprintf(stderr, "Failed to allocate memory for fs_info\n");
+		return NULL;
+	}
+
+	ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
+	if (ret)
+		goto out;
+
+	fs_info->fs_devices = fs_devices;
+
+	ret = btrfs_open_devices(fs_devices);
+	if (ret)
+		goto out;
+
+	disk_super = fs_info->super_copy;
+	ret = btrfs_read_dev_super(desc, part, disk_super);
+	if (ret) {
+		printk("No valid btrfs found\n");
+		goto out_devices;
+	}
+
+	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
+		fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
+		goto out_devices;
+	}
+
+	ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
+	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
+		ASSERT(!memcmp(disk_super->metadata_uuid,
+			       fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
+
+	fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
+	fs_info->nodesize = btrfs_super_nodesize(disk_super);
+	fs_info->stripesize = btrfs_super_stripesize(disk_super);
+
+	ret = btrfs_check_fs_compatibility(fs_info->super_copy);
+	if (ret)
+		goto out_devices;
+
+	ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
+	if (ret)
+		goto out_chunk;
+
+	/* Chunk tree root is unable to read, return directly */
+	if (!fs_info->chunk_root)
+		return fs_info;
+
+	eb = fs_info->chunk_root->node;
+	read_extent_buffer(eb, fs_info->chunk_tree_uuid,
+			   btrfs_header_chunk_tree_uuid(eb),
+			   BTRFS_UUID_SIZE);
+
+	ret = btrfs_setup_all_roots(fs_info);
+	if (ret)
+		goto out_chunk;
+
+	return fs_info;
+
+out_chunk:
+	btrfs_release_all_roots(fs_info);
+	btrfs_cleanup_all_caches(fs_info);
+out_devices:
+	btrfs_close_devices(fs_devices);
+out:
+	btrfs_free_fs_info(fs_info);
+	return NULL;
+}
+
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
+{
+	int ret;
+	int err = 0;
+
+	free_fs_roots_tree(&fs_info->fs_root_tree);
+
+	btrfs_release_all_roots(fs_info);
+	ret = btrfs_close_devices(fs_info->fs_devices);
+	btrfs_cleanup_all_caches(fs_info);
+	btrfs_free_fs_info(fs_info);
+	if (!err)
+		err = ret;
+	return err;
+}
+
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+{
+	int ret;
+
+	ret = extent_buffer_uptodate(buf);
+	if (!ret)
+		return ret;
+
+	ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
+				    parent_transid, 1);
+	return !ret;
+}
+
+int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
+{
+	return set_extent_buffer_uptodate(eb);
+}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
new file mode 100644
index 00000000000..a3479120782
--- /dev/null
+++ b/fs/btrfs/disk-io.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0+
+#ifndef __BTRFS_DISK_IO_H__
+#define __BTRFS_DISK_IO_H__
+
+#include <linux/sizes.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+
+#define BTRFS_SUPER_INFO_OFFSET SZ_64K
+#define BTRFS_SUPER_INFO_SIZE	SZ_4K
+
+/* From btrfs-progs */
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror);
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+		u64 parent_transid);
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+		     u64 *len, int mirror);
+struct extent_buffer* btrfs_find_create_tree_block(
+		struct btrfs_fs_info *fs_info, u64 bytenr);
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+					    u64 bytenr, u32 blocksize);
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+					       struct btrfs_key *location);
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+				      struct btrfs_key *location);
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+		      u64 objectid);
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
+struct btrfs_fs_info *btrfs_new_fs_info(void);
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb);
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info);
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+					 struct disk_partition *part);
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info);
+
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+			 struct btrfs_super_block *sb);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len);
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize,
+			 int verify, u16 csum_type);
+#endif
diff --git a/fs/btrfs/extent-cache.c b/fs/btrfs/extent-cache.c
new file mode 100644
index 00000000000..bc8cf3a5227
--- /dev/null
+++ b/fs/btrfs/extent-cache.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <stdlib.h>
+#include "extent-cache.h"
+#include "common/rbtree-utils.h"
+
+struct cache_extent_search_range {
+	u64 objectid;
+	u64 start;
+	u64 size;
+};
+
+static int cache_tree_comp_range(struct rb_node *node, void *data)
+{
+	struct cache_extent *entry;
+	struct cache_extent_search_range *range;
+
+	range = (struct cache_extent_search_range *)data;
+	entry = rb_entry(node, struct cache_extent, rb_node);
+
+	if (entry->start + entry->size <= range->start)
+		return 1;
+	else if (range->start + range->size <= entry->start)
+		return -1;
+	else
+		return 0;
+}
+
+static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2)
+{
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	entry = rb_entry(node2, struct cache_extent, rb_node);
+	range.start = entry->start;
+	range.size = entry->size;
+
+	return cache_tree_comp_range(node1, (void *)&range);
+}
+
+static int cache_tree_comp_range2(struct rb_node *node, void *data)
+{
+	struct cache_extent *entry;
+	struct cache_extent_search_range *range;
+
+	range = (struct cache_extent_search_range *)data;
+	entry = rb_entry(node, struct cache_extent, rb_node);
+
+	if (entry->objectid < range->objectid)
+		return 1;
+	else if (entry->objectid > range->objectid)
+		return -1;
+	else if (entry->start + entry->size <= range->start)
+		return 1;
+	else if (range->start + range->size <= entry->start)
+		return -1;
+	else
+		return 0;
+}
+
+static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2)
+{
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	entry = rb_entry(node2, struct cache_extent, rb_node);
+	range.objectid = entry->objectid;
+	range.start = entry->start;
+	range.size = entry->size;
+
+	return cache_tree_comp_range2(node1, (void *)&range);
+}
+
+void cache_tree_init(struct cache_tree *tree)
+{
+	tree->root = RB_ROOT;
+}
+
+static struct cache_extent *alloc_cache_extent(u64 start, u64 size)
+{
+	struct cache_extent *pe = malloc(sizeof(*pe));
+
+	if (!pe)
+		return pe;
+
+	pe->objectid = 0;
+	pe->start = start;
+	pe->size = size;
+	return pe;
+}
+
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+	struct cache_extent *pe = alloc_cache_extent(start, size);
+	int ret;
+
+	if (!pe)
+		return -ENOMEM;
+
+	ret = insert_cache_extent(tree, pe);
+	if (ret)
+		free(pe);
+
+	return ret;
+}
+
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+	return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes);
+}
+
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe)
+{
+	return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2);
+}
+
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+					 u64 start, u64 size)
+{
+	struct rb_node *node;
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	range.start = start;
+	range.size = size;
+	node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL);
+	if (!node)
+		return NULL;
+
+	entry = rb_entry(node, struct cache_extent, rb_node);
+	return entry;
+}
+
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+					 u64 objectid, u64 start, u64 size)
+{
+	struct rb_node *node;
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	range.objectid = objectid;
+	range.start = start;
+	range.size = size;
+	node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL);
+	if (!node)
+		return NULL;
+
+	entry = rb_entry(node, struct cache_extent, rb_node);
+	return entry;
+}
+
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start)
+{
+	struct rb_node *next;
+	struct rb_node *node;
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	range.start = start;
+	range.size = 1;
+	node = rb_search(&tree->root, &range, cache_tree_comp_range, &next);
+	if (!node)
+		node = next;
+	if (!node)
+		return NULL;
+
+	entry = rb_entry(node, struct cache_extent, rb_node);
+	return entry;
+}
+
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+					 u64 objectid, u64 start)
+{
+	struct rb_node *next;
+	struct rb_node *node;
+	struct cache_extent *entry;
+	struct cache_extent_search_range range;
+
+	range.objectid = objectid;
+	range.start = start;
+	range.size = 1;
+	node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next);
+	if (!node)
+		node = next;
+	if (!node)
+		return NULL;
+
+	entry = rb_entry(node, struct cache_extent, rb_node);
+	return entry;
+}
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree)
+{
+	struct rb_node *node = rb_first(&tree->root);
+
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *last_cache_extent(struct cache_tree *tree)
+{
+	struct rb_node *node = rb_last(&tree->root);
+
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *prev_cache_extent(struct cache_extent *pe)
+{
+	struct rb_node *node = rb_prev(&pe->rb_node);
+
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *next_cache_extent(struct cache_extent *pe)
+{
+	struct rb_node *node = rb_next(&pe->rb_node);
+
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct cache_extent, rb_node);
+}
+
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+	rb_erase(&pe->rb_node, &tree->root);
+}
+
+void cache_tree_free_extents(struct cache_tree *tree,
+			     free_cache_extent free_func)
+{
+	struct cache_extent *ce;
+
+	while ((ce = first_cache_extent(tree))) {
+		remove_cache_extent(tree, ce);
+		free_func(ce);
+	}
+}
+
+static void free_extent_cache(struct cache_extent *pe)
+{
+	free(pe);
+}
+
+void free_extent_cache_tree(struct cache_tree *tree)
+{
+	cache_tree_free_extents(tree, free_extent_cache);
+}
+
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+	struct cache_extent *cache;
+	struct cache_extent *next = NULL;
+	struct cache_extent *prev = NULL;
+	int next_merged = 0;
+	int prev_merged = 0;
+	int ret = 0;
+
+	if (cache_tree_empty(tree))
+		goto insert;
+
+	cache = search_cache_extent(tree, start);
+	if (!cache) {
+		/*
+		 * Either the tree is completely empty, or the no range after
+		 * start.
+		 * Either way, the last cache_extent should be prev.
+		 */
+		prev = last_cache_extent(tree);
+	} else if (start <= cache->start) {
+		next = cache;
+		prev = prev_cache_extent(cache);
+	} else {
+		prev = cache;
+		next = next_cache_extent(cache);
+	}
+
+	/*
+	 * Ensure the range to be inserted won't cover with existings
+	 * Or we will need extra loop to do merge
+	 */
+	BUG_ON(next && start + size > next->start);
+	BUG_ON(prev && prev->start + prev->size > start);
+
+	if (next && start + size == next->start) {
+		next_merged = 1;
+		next->size = next->start + next->size - start;
+		next->start = start;
+	}
+	if (prev && prev->start + prev->size == start) {
+		prev_merged = 1;
+		if (next_merged) {
+			next->size = next->start + next->size - prev->start;
+			next->start = prev->start;
+			remove_cache_extent(tree, prev);
+			free(prev);
+		} else {
+			prev->size = start + size - prev->start;
+		}
+	}
+insert:
+	if (!prev_merged && !next_merged)
+		ret = add_cache_extent(tree, start, size);
+	return ret;
+}
diff --git a/fs/btrfs/extent-cache.h b/fs/btrfs/extent-cache.h
new file mode 100644
index 00000000000..2fee81a66e1
--- /dev/null
+++ b/fs/btrfs/extent-cache.h
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#ifndef __BTRFS_EXTENT_CACHE_H__
+#define __BTRFS_EXTENT_CACHE_H__
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct cache_tree {
+	struct rb_root root;
+};
+
+struct cache_extent {
+	struct rb_node rb_node;
+	u64 objectid;
+	u64 start;
+	u64 size;
+};
+
+void cache_tree_init(struct cache_tree *tree);
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree);
+struct cache_extent *last_cache_extent(struct cache_tree *tree);
+struct cache_extent *prev_cache_extent(struct cache_extent *pe);
+struct cache_extent *next_cache_extent(struct cache_extent *pe);
+
+/*
+ * Find a cache_extent which covers start.
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start);
+
+/*
+ * Find a cache_extent which restrictly covers start.
+ *
+ * If not found, return NULL.
+ */
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+					 u64 start, u64 size);
+
+/*
+ * Add an non-overlap extent into cache tree
+ *
+ * If [start, start+size) overlap with existing one, it will return -EEXIST.
+ */
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+/*
+ * Same with add_cache_extent, but with cache_extent strcut.
+ */
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+
+static inline int cache_tree_empty(struct cache_tree *tree)
+{
+	return RB_EMPTY_ROOT(&tree->root);
+}
+
+typedef void (*free_cache_extent)(struct cache_extent *pe);
+
+void cache_tree_free_extents(struct cache_tree *tree,
+			     free_cache_extent free_func);
+
+#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func)		\
+static void free_##name##_tree(struct cache_tree *tree)		\
+{								\
+	cache_tree_free_extents(tree, free_func);		\
+}
+
+void free_extent_cache_tree(struct cache_tree *tree);
+
+/*
+ * Search a cache_extent with same objectid, and covers start.
+ *
+ * If not found, return next if possible.
+ */
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+					  u64 objectid, u64 start);
+/*
+ * Search a cache_extent with same objectid, and covers the range
+ * [start, start + size)
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+					  u64 objectid, u64 start, u64 size);
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe);
+
+/*
+ * Insert a cache_extent range [start, start + size).
+ *
+ * This function may merge with existing cache_extent.
+ * NOTE: caller must ensure the inserted range won't cover with any existing
+ * range.
+ */
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+#endif
diff --git a/fs/btrfs/extent-io.c b/fs/btrfs/extent-io.c
index 2e4599cf64a..774e29eb602 100644
--- a/fs/btrfs/extent-io.c
+++ b/fs/btrfs/extent-io.c
@@ -5,122 +5,805 @@
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
+#include <linux/bug.h>
 #include <malloc.h>
 #include <memalign.h>
+#include "btrfs.h"
+#include "ctree.h"
+#include "extent-io.h"
+#include "disk-io.h"
+
+void extent_io_tree_init(struct extent_io_tree *tree)
+{
+	cache_tree_init(&tree->state);
+	cache_tree_init(&tree->cache);
+	tree->cache_size = 0;
+}
+
+static struct extent_state *alloc_extent_state(void)
+{
+	struct extent_state *state;
+
+	state = malloc(sizeof(*state));
+	if (!state)
+		return NULL;
+	state->cache_node.objectid = 0;
+	state->refs = 1;
+	state->state = 0;
+	state->xprivate = 0;
+	return state;
+}
+
+static void btrfs_free_extent_state(struct extent_state *state)
+{
+	state->refs--;
+	BUG_ON(state->refs < 0);
+	if (state->refs == 0)
+		free(state);
+}
 
-u64 btrfs_read_extent_inline(struct btrfs_path *path,
-			     struct btrfs_file_extent_item *extent, u64 offset,
-			     u64 size, char *out)
+static void free_extent_state_func(struct cache_extent *cache)
 {
-	u32 clen, dlen, orig_size = size, res;
-	const char *cbuf;
-	char *dbuf;
-	const int data_off = offsetof(struct btrfs_file_extent_item,
-				      disk_bytenr);
+	struct extent_state *es;
+
+	es = container_of(cache, struct extent_state, cache_node);
+	btrfs_free_extent_state(es);
+}
 
-	clen = btrfs_path_item_size(path) - data_off;
-	cbuf = (const char *) extent + data_off;
-	dlen = extent->ram_bytes;
+static void free_extent_buffer_final(struct extent_buffer *eb);
+void extent_io_tree_cleanup(struct extent_io_tree *tree)
+{
+	cache_tree_free_extents(&tree->state, free_extent_state_func);
+}
 
-	if (offset > dlen)
-		return -1ULL;
+static inline void update_extent_state(struct extent_state *state)
+{
+	state->cache_node.start = state->start;
+	state->cache_node.size = state->end + 1 - state->start;
+}
+
+/*
+ * Utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field are
+ * not merged
+ */
+static int merge_state(struct extent_io_tree *tree,
+		       struct extent_state *state)
+{
+	struct extent_state *other;
+	struct cache_extent *other_node;
 
-	if (size > dlen - offset)
-		size = dlen - offset;
+	if (state->state & EXTENT_IOBITS)
+		return 0;
 
-	if (extent->compression == BTRFS_COMPRESS_NONE) {
-		memcpy(out, cbuf + offset, size);
-		return size;
+	other_node = prev_cache_extent(&state->cache_node);
+	if (other_node) {
+		other = container_of(other_node, struct extent_state,
+				     cache_node);
+		if (other->end == state->start - 1 &&
+		    other->state == state->state) {
+			state->start = other->start;
+			update_extent_state(state);
+			remove_cache_extent(&tree->state, &other->cache_node);
+			btrfs_free_extent_state(other);
+		}
 	}
+	other_node = next_cache_extent(&state->cache_node);
+	if (other_node) {
+		other = container_of(other_node, struct extent_state,
+				     cache_node);
+		if (other->start == state->end + 1 &&
+		    other->state == state->state) {
+			other->start = state->start;
+			update_extent_state(other);
+			remove_cache_extent(&tree->state, &state->cache_node);
+			btrfs_free_extent_state(state);
+		}
+	}
+	return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree.  'bits' are set on the
+ * struct before it is inserted.
+ */
+static int insert_state(struct extent_io_tree *tree,
+			struct extent_state *state, u64 start, u64 end,
+			int bits)
+{
+	int ret;
+
+	BUG_ON(end < start);
+	state->state |= bits;
+	state->start = start;
+	state->end = end;
+	update_extent_state(state);
+	ret = insert_cache_extent(&tree->state, &state->cache_node);
+	BUG_ON(ret);
+	merge_state(tree, state);
+	return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half.  'split' indicates an
+ * offset inside 'orig' where it should be split.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+		       struct extent_state *prealloc, u64 split)
+{
+	int ret;
+	prealloc->start = orig->start;
+	prealloc->end = split - 1;
+	prealloc->state = orig->state;
+	update_extent_state(prealloc);
+	orig->start = split;
+	update_extent_state(orig);
+	ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
+	BUG_ON(ret);
+	return 0;
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+			    struct extent_state *state, int bits)
+{
+	int ret = state->state & bits;
 
-	if (dlen > orig_size) {
-		dbuf = malloc(dlen);
-		if (!dbuf)
-			return -1ULL;
+	state->state &= ~bits;
+	if (state->state == 0) {
+		remove_cache_extent(&tree->state, &state->cache_node);
+		btrfs_free_extent_state(state);
 	} else {
-		dbuf = out;
+		merge_state(tree, state);
+	}
+	return ret;
+}
+
+/*
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+			      unsigned long pos, unsigned long len)
+{
+	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+	const unsigned int size = pos + len;
+	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+	while (len >= bits_to_set) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_BYTE;
+		mask_to_set = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
+/*
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+				unsigned long pos, unsigned long len)
+{
+	u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+	const unsigned int size = pos + len;
+	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+	while (len >= bits_to_clear) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_BYTE;
+		mask_to_clear = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+	struct extent_state *state;
+	struct extent_state *prealloc = NULL;
+	struct cache_extent *node;
+	u64 last_end;
+	int err;
+	int set = 0;
+
+again:
+	if (!prealloc) {
+		prealloc = alloc_extent_state();
+		if (!prealloc)
+			return -ENOMEM;
+	}
+
+	/*
+	 * this search will find the extents that end after
+	 * our range starts
+	 */
+	node = search_cache_extent(&tree->state, start);
+	if (!node)
+		goto out;
+	state = container_of(node, struct extent_state, cache_node);
+	if (state->start > end)
+		goto out;
+	last_end = state->end;
+
+	/*
+	 *     | ---- desired range ---- |
+	 *  | state | or
+	 *  | ------------- state -------------- |
+	 *
+	 * We need to split the extent we found, and may flip
+	 * bits on second half.
+	 *
+	 * If the extent we found extends past our range, we
+	 * just split and search again.  It'll get split again
+	 * the next time though.
+	 *
+	 * If the extent we found is inside our range, we clear
+	 * the desired bit on it.
+	 */
+	if (state->start < start) {
+		err = split_state(tree, state, prealloc, start);
+		BUG_ON(err == -EEXIST);
+		prealloc = NULL;
+		if (err)
+			goto out;
+		if (state->end <= end) {
+			set |= clear_state_bit(tree, state, bits);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
+		} else {
+			start = state->start;
+		}
+		goto search_again;
+	}
+	/*
+	 * | ---- desired range ---- |
+	 *                        | state |
+	 * We need to split the extent, and clear the bit
+	 * on the first half
+	 */
+	if (state->start <= end && state->end > end) {
+		err = split_state(tree, state, prealloc, end + 1);
+		BUG_ON(err == -EEXIST);
+
+		set |= clear_state_bit(tree, prealloc, bits);
+		prealloc = NULL;
+		goto out;
+	}
+
+	start = state->end + 1;
+	set |= clear_state_bit(tree, state, bits);
+	if (last_end == (u64)-1)
+		goto out;
+	start = last_end + 1;
+	goto search_again;
+out:
+	if (prealloc)
+		btrfs_free_extent_state(prealloc);
+	return set;
+
+search_again:
+	if (start > end)
+		goto out;
+	goto again;
+}
+
+/*
+ * set some bits on a range in the tree.
+ */
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+	struct extent_state *state;
+	struct extent_state *prealloc = NULL;
+	struct cache_extent *node;
+	int err = 0;
+	u64 last_start;
+	u64 last_end;
+again:
+	if (!prealloc) {
+		prealloc = alloc_extent_state();
+		if (!prealloc)
+			return -ENOMEM;
+	}
+
+	/*
+	 * this search will find the extents that end after
+	 * our range starts
+	 */
+	node = search_cache_extent(&tree->state, start);
+	if (!node) {
+		err = insert_state(tree, prealloc, start, end, bits);
+		BUG_ON(err == -EEXIST);
+		prealloc = NULL;
+		goto out;
+	}
+
+	state = container_of(node, struct extent_state, cache_node);
+	last_start = state->start;
+	last_end = state->end;
+
+	/*
+	 * | ---- desired range ---- |
+	 * | state |
+	 *
+	 * Just lock what we found and keep going
+	 */
+	if (state->start == start && state->end <= end) {
+		state->state |= bits;
+		merge_state(tree, state);
+		if (last_end == (u64)-1)
+			goto out;
+		start = last_end + 1;
+		goto search_again;
+	}
+	/*
+	 *     | ---- desired range ---- |
+	 * | state |
+	 *   or
+	 * | ------------- state -------------- |
+	 *
+	 * We need to split the extent we found, and may flip bits on
+	 * second half.
+	 *
+	 * If the extent we found extends past our
+	 * range, we just split and search again.  It'll get split
+	 * again the next time though.
+	 *
+	 * If the extent we found is inside our range, we set the
+	 * desired bit on it.
+	 */
+	if (state->start < start) {
+		err = split_state(tree, state, prealloc, start);
+		BUG_ON(err == -EEXIST);
+		prealloc = NULL;
+		if (err)
+			goto out;
+		if (state->end <= end) {
+			state->state |= bits;
+			start = state->end + 1;
+			merge_state(tree, state);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
+		} else {
+			start = state->start;
+		}
+		goto search_again;
+	}
+	/*
+	 * | ---- desired range ---- |
+	 *     | state | or               | state |
+	 *
+	 * There's a hole, we need to insert something in it and
+	 * ignore the extent we found.
+	 */
+	if (state->start > start) {
+		u64 this_end;
+		if (end < last_start)
+			this_end = end;
+		else
+			this_end = last_start -1;
+		err = insert_state(tree, prealloc, start, this_end,
+				bits);
+		BUG_ON(err == -EEXIST);
+		prealloc = NULL;
+		if (err)
+			goto out;
+		start = this_end + 1;
+		goto search_again;
+	}
+	/*
+	 * | ---- desired range ---- |
+	 * | ---------- state ---------- |
+	 * We need to split the extent, and set the bit
+	 * on the first half
+	 */
+	err = split_state(tree, state, prealloc, end + 1);
+	BUG_ON(err == -EEXIST);
+
+	state->state |= bits;
+	merge_state(tree, prealloc);
+	prealloc = NULL;
+out:
+	if (prealloc)
+		btrfs_free_extent_state(prealloc);
+	return err;
+search_again:
+	if (start > end)
+		goto out;
+	goto again;
+}
+
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+	return set_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+	return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+			  u64 *start_ret, u64 *end_ret, int bits)
+{
+	struct cache_extent *node;
+	struct extent_state *state;
+	int ret = 1;
+
+	/*
+	 * this search will find all the extents that end after
+	 * our range starts.
+	 */
+	node = search_cache_extent(&tree->state, start);
+	if (!node)
+		goto out;
+
+	while(1) {
+		state = container_of(node, struct extent_state, cache_node);
+		if (state->end >= start && (state->state & bits)) {
+			*start_ret = state->start;
+			*end_ret = state->end;
+			ret = 0;
+			break;
+		}
+		node = next_cache_extent(node);
+		if (!node)
+			break;
+	}
+out:
+	return ret;
+}
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+		   int bits, int filled)
+{
+	struct extent_state *state = NULL;
+	struct cache_extent *node;
+	int bitset = 0;
+
+	node = search_cache_extent(&tree->state, start);
+	while (node && start <= end) {
+		state = container_of(node, struct extent_state, cache_node);
+
+		if (filled && state->start > start) {
+			bitset = 0;
+			break;
+		}
+		if (state->start > end)
+			break;
+		if (state->state & bits) {
+			bitset = 1;
+			if (!filled)
+				break;
+		} else if (filled) {
+			bitset = 0;
+			break;
+		}
+		start = state->end + 1;
+		if (start > end)
+			break;
+		node = next_cache_extent(node);
+		if (!node) {
+			if (filled)
+				bitset = 0;
+			break;
+		}
+	}
+	return bitset;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+	struct cache_extent *node;
+	struct extent_state *state;
+	int ret = 0;
+
+	node = search_cache_extent(&tree->state, start);
+	if (!node) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state = container_of(node, struct extent_state, cache_node);
+	if (state->start != start) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state->xprivate = private;
+out:
+	return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+	struct cache_extent *node;
+	struct extent_state *state;
+	int ret = 0;
+
+	node = search_cache_extent(&tree->state, start);
+	if (!node) {
+		ret = -ENOENT;
+		goto out;
+	}
+	state = container_of(node, struct extent_state, cache_node);
+	if (state->start != start) {
+		ret = -ENOENT;
+		goto out;
+	}
+	*private = state->xprivate;
+out:
+	return ret;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
+						   u64 bytenr, u32 blocksize)
+{
+	struct extent_buffer *eb;
+
+	eb = calloc(1, sizeof(struct extent_buffer));
+	if (!eb)
+		return NULL;
+	eb->data = malloc_cache_aligned(blocksize);
+	if (!eb->data) {
+		free(eb);
+		return NULL;
+	}
+
+	eb->start = bytenr;
+	eb->len = blocksize;
+	eb->refs = 1;
+	eb->flags = 0;
+	eb->cache_node.start = bytenr;
+	eb->cache_node.size = blocksize;
+	eb->fs_info = info;
+	memset_extent_buffer(eb, 0, 0, blocksize);
+
+	return eb;
+}
+
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+	struct extent_buffer *new;
+
+	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
+	if (!new)
+		return NULL;
+
+	copy_extent_buffer(new, src, 0, 0, src->len);
+	new->flags |= EXTENT_BUFFER_DUMMY;
+
+	return new;
+}
+
+static void free_extent_buffer_final(struct extent_buffer *eb)
+{
+	BUG_ON(eb->refs);
+	if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
+		struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+
+		remove_cache_extent(&tree->cache, &eb->cache_node);
+		BUG_ON(tree->cache_size < eb->len);
+		tree->cache_size -= eb->len;
 	}
+	free(eb->data);
+	free(eb);
+}
 
-	res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
-	if (res == -1 || res != dlen)
-		goto err;
+static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
+{
+	if (!eb || IS_ERR(eb))
+		return;
 
-	if (dlen > orig_size) {
-		memcpy(out, dbuf + offset, size);
-		free(dbuf);
-	} else if (offset) {
-		memmove(out, dbuf + offset, size);
+	eb->refs--;
+	BUG_ON(eb->refs < 0);
+	if (eb->refs == 0) {
+		if (eb->flags & EXTENT_DIRTY) {
+			error(
+			"dirty eb leak (aborted trans): start %llu len %u",
+				eb->start, eb->len);
+		}
+		if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
+			free_extent_buffer_final(eb);
 	}
+}
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+	free_extent_buffer_internal(eb, 1);
+}
 
-	return size;
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+					 u64 bytenr, u32 blocksize)
+{
+	struct extent_buffer *eb = NULL;
+	struct cache_extent *cache;
 
-err:
-	if (dlen > orig_size)
-		free(dbuf);
-	return -1ULL;
+	cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+	if (cache && cache->start == bytenr &&
+	    cache->size == blocksize) {
+		eb = container_of(cache, struct extent_buffer, cache_node);
+		eb->refs++;
+	}
+	return eb;
 }
 
-u64 btrfs_read_extent_reg(struct btrfs_path *path,
-			  struct btrfs_file_extent_item *extent, u64 offset,
-			  u64 size, char *out)
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+					       u64 start)
 {
-	u64 physical, clen, dlen, orig_size = size;
-	u32 res;
-	char *cbuf, *dbuf;
+	struct extent_buffer *eb = NULL;
+	struct cache_extent *cache;
 
-	clen = extent->disk_num_bytes;
-	dlen = extent->num_bytes;
+	cache = search_cache_extent(&tree->cache, start);
+	if (cache) {
+		eb = container_of(cache, struct extent_buffer, cache_node);
+		eb->refs++;
+	}
+	return eb;
+}
 
-	if (offset > dlen)
-		return -1ULL;
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+					  u64 bytenr, u32 blocksize)
+{
+	struct extent_buffer *eb;
+	struct extent_io_tree *tree = &fs_info->extent_cache;
+	struct cache_extent *cache;
 
-	if (size > dlen - offset)
-		size = dlen - offset;
+	cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+	if (cache && cache->start == bytenr &&
+	    cache->size == blocksize) {
+		eb = container_of(cache, struct extent_buffer, cache_node);
+		eb->refs++;
+	} else {
+		int ret;
 
-	/* sparse extent */
-	if (extent->disk_bytenr == 0) {
-		memset(out, 0, size);
-		return size;
+		if (cache) {
+			eb = container_of(cache, struct extent_buffer,
+					  cache_node);
+			free_extent_buffer(eb);
+		}
+		eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+		if (!eb)
+			return NULL;
+		ret = insert_cache_extent(&tree->cache, &eb->cache_node);
+		if (ret) {
+			free(eb);
+			return NULL;
+		}
+		tree->cache_size += blocksize;
 	}
+	return eb;
+}
 
-	physical = btrfs_map_logical_to_physical(extent->disk_bytenr);
-	if (physical == -1ULL)
-		return -1ULL;
+/*
+ * Allocate a dummy extent buffer which won't be inserted into extent buffer
+ * cache.
+ *
+ * This mostly allows super block read write using existing eb infrastructure
+ * without pulluting the eb cache.
+ *
+ * This is especially important to avoid injecting eb->start == SZ_64K, as
+ * fuzzed image could have invalid tree bytenr covers super block range,
+ * and cause ref count underflow.
+ */
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+						u64 bytenr, u32 blocksize)
+{
+	struct extent_buffer *ret;
 
-	if (extent->compression == BTRFS_COMPRESS_NONE) {
-		physical += extent->offset + offset;
-		if (!btrfs_devread(physical, size, out))
-			return -1ULL;
+	ret = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+	if (!ret)
+		return NULL;
+
+	ret->flags |= EXTENT_BUFFER_DUMMY;
+
+	return ret;
+}
+
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+			  u64 physical, struct extent_buffer *eb,
+			  unsigned long offset, unsigned long len)
+{
+	int ret;
 
-		return size;
+	ret = __btrfs_devread(desc, part, eb->data + offset, len, physical);
+	if (ret < 0)
+		goto out;
+	if (ret != len) {
+		ret = -EIO;
+		goto out;
 	}
+	ret = 0;
+out:
+	return ret;
+}
+
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+			 unsigned long start, unsigned long len)
+{
+	return memcmp(eb->data + start, ptrv, len);
+}
 
-	cbuf = malloc_cache_aligned(dlen > size ? clen + dlen : clen);
-	if (!cbuf)
-		return -1ULL;
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+			unsigned long start, unsigned long len)
+{
+	memcpy(dst, eb->data + start, len);
+}
 
-	if (dlen > orig_size)
-		dbuf = cbuf + clen;
-	else
-		dbuf = out;
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+			 unsigned long start, unsigned long len)
+{
+	memcpy(eb->data + start, src, len);
+}
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+			unsigned long dst_offset, unsigned long src_offset,
+			unsigned long len)
+{
+	memcpy(dst->data + dst_offset, src->data + src_offset, len);
+}
 
-	if (!btrfs_devread(physical, clen, cbuf))
-		goto err;
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+			   unsigned long src_offset, unsigned long len)
+{
+	memmove(dst->data + dst_offset, dst->data + src_offset, len);
+}
 
-	res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
-	if (res == -1)
-		goto err;
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+			  unsigned long start, unsigned long len)
+{
+	memset(eb->data + start, c, len);
+}
 
-	if (dlen > orig_size)
-		memcpy(out, dbuf + offset, size);
-	else
-		memmove(out, dbuf + offset, size);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+			   unsigned long nr)
+{
+	return le_test_bit(nr, (u8 *)eb->data + start);
+}
 
-	free(cbuf);
-	return res;
+int set_extent_buffer_dirty(struct extent_buffer *eb)
+{
+	struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+	if (!(eb->flags & EXTENT_DIRTY)) {
+		eb->flags |= EXTENT_DIRTY;
+		set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+		extent_buffer_get(eb);
+	}
+	return 0;
+}
 
-err:
-	free(cbuf);
-	return -1ULL;
+int clear_extent_buffer_dirty(struct extent_buffer *eb)
+{
+	struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+	if (eb->flags & EXTENT_DIRTY) {
+		eb->flags &= ~EXTENT_DIRTY;
+		clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+		free_extent_buffer(eb);
+	}
+	return 0;
 }
diff --git a/fs/btrfs/extent-io.h b/fs/btrfs/extent-io.h
new file mode 100644
index 00000000000..6b0c87da969
--- /dev/null
+++ b/fs/btrfs/extent-io.h
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from btrfs-progs/extent_io.h
+ *
+ * Modification includes:
+ * - extent_buffer:data
+ *   Use pointer to provide better alignment.
+ * - Remove max_cache_size related interfaces
+ *   Includes free_extent_buffer_nocache()
+ *   As we don't cache eb in U-boot.
+ * - Include headers
+ *
+ * Write related functions are kept as we still need to modify dummy extent
+ * buffers even in RO environment.
+ */
+#ifndef __BTRFS_EXTENT_IO_H__
+#define __BTRFS_EXTENT_IO_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <fs_internal.h>
+#include "extent-cache.h"
+
+#define EXTENT_DIRTY		(1U << 0)
+#define EXTENT_WRITEBACK	(1U << 1)
+#define EXTENT_UPTODATE		(1U << 2)
+#define EXTENT_LOCKED		(1U << 3)
+#define EXTENT_NEW		(1U << 4)
+#define EXTENT_DELALLOC		(1U << 5)
+#define EXTENT_DEFRAG		(1U << 6)
+#define EXTENT_DEFRAG_DONE	(1U << 7)
+#define EXTENT_BUFFER_FILLED	(1U << 8)
+#define EXTENT_CSUM		(1U << 9)
+#define EXTENT_BAD_TRANSID	(1U << 10)
+#define EXTENT_BUFFER_DUMMY	(1U << 11)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+#define BLOCK_GROUP_DATA	(1U << 1)
+#define BLOCK_GROUP_METADATA	(1U << 2)
+#define BLOCK_GROUP_SYSTEM	(1U << 4)
+
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ *    single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+	((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+	return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+struct btrfs_fs_info;
+
+struct extent_io_tree {
+	struct cache_tree state;
+	struct cache_tree cache;
+	u64 cache_size;
+};
+
+struct extent_state {
+	struct cache_extent cache_node;
+	u64 start;
+	u64 end;
+	int refs;
+	unsigned long state;
+	u64 xprivate;
+};
+
+struct extent_buffer {
+	struct cache_extent cache_node;
+	u64 start;
+	u32 len;
+	int refs;
+	u32 flags;
+	struct btrfs_fs_info *fs_info;
+	char *data;
+};
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+	eb->refs++;
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree);
+void extent_io_tree_cleanup(struct extent_io_tree *tree);
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+			  u64 *start_ret, u64 *end_ret, int bits);
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+		   int bits, int filled);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+static inline int set_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+	eb->flags |= EXTENT_UPTODATE;
+	return 0;
+}
+
+static inline int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+	eb->flags &= ~EXTENT_UPTODATE;
+	return 0;
+}
+
+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+{
+	if (!eb || IS_ERR(eb))
+		return 0;
+	if (eb->flags & EXTENT_UPTODATE)
+		return 1;
+	return 0;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+					 u64 bytenr, u32 blocksize);
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+					       u64 start);
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+					  u64 bytenr, u32 blocksize);
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+						u64 bytenr, u32 blocksize);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+			  u64 physical, struct extent_buffer *eb,
+			  unsigned long offset, unsigned long len);
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+			 unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+			unsigned long start, unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+			 unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+			unsigned long dst_offset, unsigned long src_offset,
+			unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+			   unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+			  unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+			   unsigned long nr);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_buffer *eb);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+				unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+			      unsigned long pos, unsigned long len);
+
+#endif
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
deleted file mode 100644
index 52a8ceaf0c8..00000000000
--- a/fs/btrfs/hash.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <u-boot/crc.h>
-#include <asm/unaligned.h>
-
-static u32 btrfs_crc32c_table[256];
-
-void btrfs_hash_init(void)
-{
-	static int inited = 0;
-
-	if (!inited) {
-		crc32c_init(btrfs_crc32c_table, 0x82F63B78);
-		inited = 1;
-	}
-}
-
-u32 btrfs_crc32c(u32 crc, const void *data, size_t length)
-{
-	return crc32c_cal(crc, (const char *) data, length,
-			  btrfs_crc32c_table);
-}
-
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
-{
-	return btrfs_crc32c(seed, data, len);
-}
-
-void btrfs_csum_final(u32 crc, void *result)
-{
-	put_unaligned(cpu_to_le32(~crc), (u32 *)result);
-}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 991c2f68c3b..ff330280e02 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5,193 +5,187 @@
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
 #include <malloc.h>
+#include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+#include "volumes.h"
 
-u64 btrfs_lookup_inode_ref(struct btrfs_root *root, u64 inr,
-			   struct btrfs_inode_ref *refp, char *name)
+/*
+ * Read the content of symlink inode @ino of @root, into @target.
+ * NOTE: @target will not be \0 termiated, caller should handle it properly.
+ *
+ * Return the number of read data.
+ * Return <0 for error.
+ */
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target)
 {
 	struct btrfs_path path;
-	struct btrfs_key *key;
-	struct btrfs_inode_ref *ref;
-	u64 res = -1ULL;
-
-	key = btrfs_search_tree_key_type(root, inr, BTRFS_INODE_REF_KEY,
-					       &path);
-
-	if (!key)
-		return -1ULL;
-
-	ref = btrfs_path_item_ptr(&path, struct btrfs_inode_ref);
-	btrfs_inode_ref_to_cpu(ref);
-
-	if (refp)
-		*refp = *ref;
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	int ret;
 
-	if (name) {
-		if (ref->name_len > BTRFS_NAME_MAX) {
-			printf("%s: inode name too long: %u\n", __func__,
-			        ref->name_len);
-			goto out;
-		}
+	key.objectid = ino;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = 0;
+	btrfs_init_path(&path);
 
-		memcpy(name, ref + 1, ref->name_len);
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	if (ret > 0) {
+		ret = -ENOENT;
+		goto out;
 	}
-
-	res = key->offset;
-out:
-	btrfs_free_path(&path);
-	return res;
-}
-
-int btrfs_lookup_inode(const struct btrfs_root *root,
-		       struct btrfs_key *location,
-		       struct btrfs_inode_item *item,
-		       struct btrfs_root *new_root)
-{
-	struct btrfs_root tmp_root = *root;
-	struct btrfs_path path;
-	int res = -1;
-
-	if (location->type == BTRFS_ROOT_ITEM_KEY) {
-		if (btrfs_find_root(location->objectid, &tmp_root, NULL))
-			return -1;
-
-		location->objectid = tmp_root.root_dirid;
-		location->type = BTRFS_INODE_ITEM_KEY;
-		location->offset = 0;
+	fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+			    struct btrfs_file_extent_item);
+	if (btrfs_file_extent_type(path.nodes[0], fi) !=
+	    BTRFS_FILE_EXTENT_INLINE) {
+		ret = -EUCLEAN;
+		error("Extent for symlink %llu must be INLINE type!", ino);
+		goto out;
 	}
-
-	if (btrfs_search_tree(&tmp_root, location, &path))
-		return res;
-
-	if (btrfs_comp_keys(location, btrfs_path_leaf_key(&path)))
+	if (btrfs_file_extent_compression(path.nodes[0], fi) !=
+	    BTRFS_COMPRESS_NONE) {
+		ret = -EUCLEAN;
+		error("Extent for symlink %llu must not be compressed!", ino);
 		goto out;
-
-	if (item) {
-		*item = *btrfs_path_item_ptr(&path, struct btrfs_inode_item);
-		btrfs_inode_item_to_cpu(item);
 	}
-
-	if (new_root)
-		*new_root = tmp_root;
-
-	res = 0;
-
+	if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >=
+	    root->fs_info->sectorsize) {
+		ret = -EUCLEAN;
+		error("Symlink %llu extent data too large (%llu)!\n",
+			ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+		goto out;
+	}
+	read_extent_buffer(path.nodes[0], target,
+			btrfs_file_extent_inline_start(fi),
+			btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+	ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi);
 out:
-	btrfs_free_path(&path);
-	return res;
+	btrfs_release_path(&path);
+	return ret;
 }
 
-int btrfs_readlink(const struct btrfs_root *root, u64 inr, char *target)
+static int lookup_root_ref(struct btrfs_fs_info *fs_info,
+			   u64 rootid, u64 *root_ret, u64 *dir_ret)
 {
+	struct btrfs_root *root = fs_info->tree_root;
+	struct btrfs_root_ref *root_ref;
 	struct btrfs_path path;
 	struct btrfs_key key;
-	struct btrfs_file_extent_item *extent;
-	const char *data_ptr;
-	int res = -1;
-
-	key.objectid = inr;
-	key.type = BTRFS_EXTENT_DATA_KEY;
-	key.offset = 0;
-
-	if (btrfs_search_tree(root, &key, &path))
-		return -1;
-
-	if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)))
-		goto out;
-
-	extent = btrfs_path_item_ptr(&path, struct btrfs_file_extent_item);
-	if (extent->type != BTRFS_FILE_EXTENT_INLINE) {
-		printf("%s: Extent for symlink %llu not of INLINE type\n",
-		       __func__, inr);
+	int ret;
+
+	btrfs_init_path(&path);
+	key.objectid = rootid;
+	key.type = BTRFS_ROOT_BACKREF_KEY;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	/* Should not happen */
+	if (ret == 0) {
+		ret = -EUCLEAN;
 		goto out;
 	}
-
-	btrfs_file_extent_item_to_cpu_inl(extent);
-
-	if (extent->compression != BTRFS_COMPRESS_NONE) {
-		printf("%s: Symlink %llu extent data compressed!\n", __func__,
-		       inr);
+	ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY);
+	if (ret < 0)
 		goto out;
-	} else if (extent->encryption != 0) {
-		printf("%s: Symlink %llu extent data encrypted!\n", __func__,
-		       inr);
-		goto out;
-	} else if (extent->ram_bytes >= btrfs_info.sb.sectorsize) {
-		printf("%s: Symlink %llu extent data too long (%llu)!\n",
-		       __func__, inr, extent->ram_bytes);
+	if (ret > 0) {
+		ret = -ENOENT;
 		goto out;
 	}
-
-	data_ptr = (const char *) extent
-		   + offsetof(struct btrfs_file_extent_item, disk_bytenr);
-
-	memcpy(target, data_ptr, extent->ram_bytes);
-	target[extent->ram_bytes] = '\0';
-	res = 0;
+	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+	root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				  struct btrfs_root_ref);
+	*root_ret = key.offset;
+	*dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref);
 out:
-	btrfs_free_path(&path);
-	return res;
+	btrfs_release_path(&path);
+	return ret;
 }
 
-/* inr must be a directory (for regular files with multiple hard links this
-   function returns only one of the parents of the file) */
-static u64 get_parent_inode(struct btrfs_root *root, u64 inr,
-			    struct btrfs_inode_item *inode_item)
+/*
+ * To get the parent inode of @ino of @root.
+ *
+ * @root_ret and @ino_ret will be filled.
+ *
+ * NOTE: This function is not reliable. It can only get one parent inode.
+ * The get the proper parent inode, we need a full VFS inodes stack to
+ * resolve properly.
+ */
+static int get_parent_inode(struct btrfs_root *root, u64 ino,
+			    struct btrfs_root **root_ret, u64 *ino_ret)
 {
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_path path;
 	struct btrfs_key key;
-	u64 res;
-
-	if (inr == BTRFS_FIRST_FREE_OBJECTID) {
-		if (root->objectid != btrfs_info.fs_root.objectid) {
-			u64 parent;
-			struct btrfs_root_ref ref;
-
-			parent = btrfs_lookup_root_ref(root->objectid, &ref,
-						       NULL);
-			if (parent == -1ULL)
-				return -1ULL;
+	int ret;
 
-			if (btrfs_find_root(parent, root, NULL))
-				return -1ULL;
+	if (ino == BTRFS_FIRST_FREE_OBJECTID) {
+		u64 parent_root = -1;
 
-			inr = ref.dirid;
+		/* It's top level already, no more parent */
+		if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) {
+			*root_ret = fs_info->fs_root;
+			*ino_ret = BTRFS_FIRST_FREE_OBJECTID;
+			return 0;
 		}
 
-		if (inode_item) {
-			key.objectid = inr;
-			key.type = BTRFS_INODE_ITEM_KEY;
-			key.offset = 0;
+		ret = lookup_root_ref(fs_info, root->root_key.objectid,
+				      &parent_root, ino_ret);
+		if (ret < 0)
+			return ret;
 
-			if (btrfs_lookup_inode(root, &key, inode_item, NULL))
-				return -1ULL;
-		}
+		key.objectid = parent_root;
+		key.type = BTRFS_ROOT_ITEM_KEY;
+		key.offset = (u64)-1;
+		*root_ret = btrfs_read_fs_root(fs_info, &key);
+		if (IS_ERR(*root_ret))
+			return PTR_ERR(*root_ret);
 
-		return inr;
+		return 0;
 	}
 
-	res = btrfs_lookup_inode_ref(root, inr, NULL, NULL);
-	if (res == -1ULL)
-		return -1ULL;
-
-	if (inode_item) {
-		key.objectid = res;
-		key.type = BTRFS_INODE_ITEM_KEY;
-		key.offset = 0;
-
-		if (btrfs_lookup_inode(root, &key, inode_item, NULL))
-			return -1ULL;
+	btrfs_init_path(&path);
+	key.objectid = ino;
+	key.type = BTRFS_INODE_REF_KEY;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	/* Should not happen */
+	if (ret == 0) {
+		ret = -EUCLEAN;
+		goto out;
 	}
-
-	return res;
+	ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+	*root_ret = root;
+	*ino_ret = key.offset;
+out:
+	btrfs_release_path(&path);
+	return ret;
 }
 
 static inline int next_length(const char *path)
 {
 	int res = 0;
-	while (*path != '\0' && *path != '/' && res <= BTRFS_NAME_LEN)
-		++res, ++path;
+	while (*path != '\0' && *path != '/') {
+		++res;
+		++path;
+		if (res > BTRFS_NAME_LEN)
+			break;
+	}
 	return res;
 }
 
@@ -209,175 +203,549 @@ static inline const char *skip_current_directories(const char *cur)
 	return cur;
 }
 
-u64 btrfs_lookup_path(struct btrfs_root *root, u64 inr, const char *path,
-		      u8 *type_p, struct btrfs_inode_item *inode_item_p,
-		      int symlink_limit)
+/*
+ * Resolve one filename of @ino of @root.
+ *
+ * key_ret:	The child key (either INODE_ITEM or ROOT_ITEM type)
+ * type_ret:	BTRFS_FT_* of the child inode.
+ *
+ * Return 0 with above members filled.
+ * Return <0 for error.
+ */
+static int resolve_one_filename(struct btrfs_root *root, u64 ino,
+				const char *name, int namelen,
+				struct btrfs_key *key_ret, u8 *type_ret)
 {
-	struct btrfs_dir_item item;
-	struct btrfs_inode_item inode_item;
-	u8 type = BTRFS_FT_DIR;
-	int len, have_inode = 0;
-	const char *cur = path;
+	struct btrfs_dir_item *dir_item;
+	struct btrfs_path path;
+	int ret = 0;
+
+	btrfs_init_path(&path);
+
+	dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name,
+					 namelen, 0);
+	if (IS_ERR(dir_item)) {
+		ret = PTR_ERR(dir_item);
+		goto out;
+	}
 
+	btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
+	*type_ret = btrfs_dir_type(path.nodes[0], dir_item);
+out:
+	btrfs_release_path(&path);
+	return ret;
+}
+
+/*
+ * Resolve a full path @filename. The start point is @ino of @root.
+ *
+ * The result will be filled into @root_ret, @ino_ret and @type_ret.
+ */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+			struct btrfs_root **root_ret, u64 *ino_ret,
+			u8 *type_ret, int symlink_limit)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_root *next_root;
+	struct btrfs_key key;
+	const char *cur = filename;
+	u64 next_ino;
+	u8 next_type;
+	u8 type;
+	int len;
+	int ret = 0;
+
+	/* If the path is absolute path, also search from fs root */
 	if (*cur == '/') {
-		++cur;
-		inr = root->root_dirid;
+		root = fs_info->fs_root;
+		ino = btrfs_root_dirid(&root->root_item);
+		type = BTRFS_FT_DIR;
 	}
 
-	do {
+	while (*cur != '\0') {
 		cur = skip_current_directories(cur);
 
 		len = next_length(cur);
 		if (len > BTRFS_NAME_LEN) {
-			printf("%s: Name too long at \"%.*s\"\n", __func__,
+			error("%s: Name too long at \"%.*s\"", __func__,
 			       BTRFS_NAME_LEN, cur);
-			return -1ULL;
+			return -ENAMETOOLONG;
 		}
 
 		if (len == 1 && cur[0] == '.')
 			break;
 
 		if (len == 2 && cur[0] == '.' && cur[1] == '.') {
-			cur += 2;
-			inr = get_parent_inode(root, inr, &inode_item);
-			if (inr == -1ULL)
-				return -1ULL;
-
-			type = BTRFS_FT_DIR;
-			continue;
+			/* Go one level up */
+			ret = get_parent_inode(root, ino, &next_root, &next_ino);
+			if (ret < 0)
+				return ret;
+			root = next_root;
+			ino = next_ino;
+			goto next;
 		}
 
 		if (!*cur)
 			break;
-		
-		if (btrfs_lookup_dir_item(root, inr, cur, len, &item))
-			return -1ULL;
 
-		type = item.type;
-		have_inode = 1;
-		if (btrfs_lookup_inode(root, &item.location, &inode_item, root))
-			return -1ULL;
+		ret = resolve_one_filename(root, ino, cur, len, &key, &type);
+		if (ret < 0)
+			return ret;
+
+		if (key.type == BTRFS_ROOT_ITEM_KEY) {
+			/* Child inode is a subvolume */
+
+			next_root = btrfs_read_fs_root(fs_info, &key);
+			if (IS_ERR(next_root))
+				return PTR_ERR(next_root);
+			root = next_root;
+			ino = btrfs_root_dirid(&root->root_item);
+		} else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
+			/* Child inode is a symlink */
 
-		if (item.type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
 			char *target;
 
-			if (!symlink_limit) {
-				printf("%s: Too much symlinks!\n", __func__);
-				return -1ULL;
+			if (symlink_limit == 0) {
+				error("%s: Too much symlinks!", __func__);
+				return -EMLINK;
 			}
-
-			target = malloc(min(inode_item.size + 1,
-					    (u64) btrfs_info.sb.sectorsize));
+			target = malloc(fs_info->sectorsize);
 			if (!target)
-				return -1ULL;
-
-			if (btrfs_readlink(root, item.location.objectid,
-					   target)) {
+				return -ENOMEM;
+			ret = btrfs_readlink(root, key.objectid, target);
+			if (ret < 0) {
 				free(target);
-				return -1ULL;
+				return ret;
 			}
+			target[ret] = '\0';
+
+			ret = btrfs_lookup_path(root, ino, target, &next_root,
+						&next_ino, &next_type,
+						symlink_limit);
+			if (ret < 0)
+				return ret;
+			root = next_root;
+			ino = next_ino;
+			type = next_type;
+		} else {
+			/* Child inode is an inode */
+			ino = key.objectid;
+		}
+next:
+		cur += len;
+	}
 
-			inr = btrfs_lookup_path(root, inr, target, &type,
-						&inode_item, symlink_limit - 1);
+	if (!ret) {
+		*root_ret = root;
+		*ino_ret = ino;
+		*type_ret = type;
+	}
 
-			free(target);
+	return ret;
+}
 
-			if (inr == -1ULL)
-				return -1ULL;
-		} else if (item.type != BTRFS_FT_DIR && cur[len]) {
-			printf("%s: \"%.*s\" not a directory\n", __func__,
-			       (int) (cur - path + len), path);
-			return -1ULL;
-		} else {
-			inr = item.location.objectid;
-		}
+/*
+ * Read out inline extent.
+ *
+ * Since inline extent should only exist for offset 0, no need for extra
+ * parameters.
+ * Truncating should be handled by the caller.
+ *
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_inline(struct btrfs_path *path,
+			     struct btrfs_file_extent_item *fi, char *dest)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	int slot = path->slots[0];
+	char *cbuf = NULL;
+	char *dbuf = NULL;
+	u32 csize;
+	u32 dsize;
+	int ret;
+
+	csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot));
+	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+		/* Uncompressed, just read it out */
+		read_extent_buffer(leaf, dest,
+				btrfs_file_extent_inline_start(fi),
+				csize);
+		return csize;
+	}
 
-		cur += len;
-	} while (*cur);
+	/* Compressed extent, prepare the compressed and data buffer */
+	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+	cbuf = malloc(csize);
+	dbuf = malloc(dsize);
+	if (!cbuf || !dbuf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi),
+			   csize);
+	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi),
+			       cbuf, csize, dbuf, dsize);
+	if (ret < 0 || ret != dsize) {
+		ret = -EIO;
+		goto out;
+	}
+	memcpy(dest, dbuf, dsize);
+	ret = dsize;
+out:
+	free(cbuf);
+	free(dbuf);
+	return ret;
+}
 
-	if (type_p)
-		*type_p = type;
+/*
+ * Read out regular extent.
+ *
+ * Truncating should be handled by the caller.
+ *
+ * @offset and @len should not cross the extent boundary.
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_reg(struct btrfs_path *path,
+			  struct btrfs_file_extent_item *fi, u64 offset,
+			  int len, char *dest)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
+	struct btrfs_key key;
+	u64 extent_num_bytes;
+	u64 disk_bytenr;
+	u64 read;
+	char *cbuf = NULL;
+	char *dbuf = NULL;
+	u32 csize;
+	u32 dsize;
+	bool finished = false;
+	int num_copies;
+	int i;
+	int slot = path->slots[0];
+	int ret;
+
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+	ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) &&
+	       IS_ALIGNED(len, fs_info->sectorsize));
+	ASSERT(offset >= key.offset &&
+	       offset + len <= key.offset + extent_num_bytes);
+
+	/* Preallocated or hole , fill @dest with zero */
+	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC ||
+	    btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+		memset(dest, 0, len);
+		return len;
+	}
 
-	if (inode_item_p) {
-		if (!have_inode) {
-			struct btrfs_key key;
+	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+		u64 logical;
 
-			key.objectid = inr;
-			key.type = BTRFS_INODE_ITEM_KEY;
-			key.offset = 0;
+		logical = btrfs_file_extent_disk_bytenr(leaf, fi) +
+			  btrfs_file_extent_offset(leaf, fi) +
+			  offset - key.offset;
+		read = len;
 
-			if (btrfs_lookup_inode(root, &key, &inode_item, NULL))
-				return -1ULL;
+		num_copies = btrfs_num_copies(fs_info, logical, len);
+		for (i = 1; i <= num_copies; i++) {
+			ret = read_extent_data(fs_info, dest, logical, &read, i);
+			if (ret < 0 || read != len)
+				continue;
+			finished = true;
+			break;
 		}
+		if (!finished)
+			return -EIO;
+		return len;
+	}
+
+	csize = btrfs_file_extent_disk_num_bytes(leaf, fi);
+	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+	num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize);
 
-		*inode_item_p = inode_item;
+	cbuf = malloc_cache_aligned(csize);
+	dbuf = malloc_cache_aligned(dsize);
+	if (!cbuf || !dbuf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	/* For compressed extent, we must read the whole on-disk extent */
+	for (i = 1; i <= num_copies; i++) {
+		read = csize;
+		ret = read_extent_data(fs_info, cbuf, disk_bytenr,
+				       &read, i);
+		if (ret < 0 || read != csize)
+			continue;
+		finished = true;
+		break;
+	}
+	if (!finished) {
+		ret = -EIO;
+		goto out;
 	}
 
-	return inr;
+	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf,
+			       csize, dbuf, dsize);
+	if (ret != dsize) {
+		ret = -EIO;
+		goto out;
+	}
+	/* Then copy the needed part */
+	memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len);
+	ret = len;
+out:
+	free(cbuf);
+	free(dbuf);
+	return ret;
 }
 
-u64 btrfs_file_read(const struct btrfs_root *root, u64 inr, u64 offset,
-		    u64 size, char *buf)
+/*
+ * Get the first file extent that covers bytenr @file_offset.
+ *
+ * @file_offset must be aligned to sectorsize.
+ *
+ * return 0 for found, and path points to the file extent.
+ * return >0 for not found, and fill @next_offset.
+ * @next_offset can be 0 if there is no next file extent.
+ * return <0 for error.
+ */
+static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path,
+			      u64 ino, u64 file_offset, u64 *next_offset)
 {
-	struct btrfs_path path;
 	struct btrfs_key key;
-	struct btrfs_file_extent_item *extent;
-	int res = 0;
-	u64 rd, rd_all = -1ULL;
+	struct btrfs_file_extent_item *fi;
+	u8 extent_type;
+	int ret = 0;
 
-	key.objectid = inr;
+	ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize));
+	key.objectid = ino;
 	key.type = BTRFS_EXTENT_DATA_KEY;
-	key.offset = offset;
-
-	if (btrfs_search_tree(root, &key, &path))
-		return -1ULL;
-
-	if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)) < 0) {
-		if (btrfs_prev_slot(&path))
-			goto out;
+	key.offset = file_offset;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	/* Error or we're already at the file extent */
+	if (ret <= 0)
+		return ret;
+	if (ret > 0) {
+		/* Check previous file extent */
+		ret = btrfs_previous_item(root, path, ino,
+					  BTRFS_EXTENT_DATA_KEY);
+		if (ret < 0)
+			return ret;
+		if (ret > 0)
+			goto check_next;
+	}
+	/* Now the key.offset must be smaller than @file_offset */
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	if (key.objectid != ino ||
+	    key.type != BTRFS_EXTENT_DATA_KEY)
+		goto check_next;
+
+	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_file_extent_item);
+	extent_type = btrfs_file_extent_type(path->nodes[0], fi);
+	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+		if (file_offset == 0)
+			return 0;
+		/* Inline extent should be the only extent, no next extent. */
+		*next_offset = 0;
+		return 1;
+	}
 
-		if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-			goto out;
+	/* This file extent covers @file_offset */
+	if (key.offset <= file_offset && key.offset +
+	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+		return 0;
+check_next:
+	ret = btrfs_next_item(root, path);
+	if (ret < 0)
+		return ret;
+	if (ret > 0) {
+		*next_offset = 0;
+		return 1;
 	}
 
-	rd_all = 0;
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_file_extent_item);
+	/* Next next data extent */
+	if (key.objectid != ino ||
+	    key.type != BTRFS_EXTENT_DATA_KEY) {
+		*next_offset = 0;
+		return 1;
+	}
+	/* Current file extent already beyond @file_offset */
+	if (key.offset > file_offset) {
+		*next_offset = key.offset;
+		return 1;
+	}
+	/* This file extent covers @file_offset */
+	if (key.offset <= file_offset && key.offset +
+	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+		return 0;
+	/* This file extent ends before @file_offset, check next */
+	ret = btrfs_next_item(root, path);
+	if (ret < 0)
+		return ret;
+	if (ret > 0) {
+		*next_offset = 0;
+		return 1;
+	}
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) {
+		*next_offset = 0;
+		return 1;
+	}
+	*next_offset = key.offset;
+	return 1;
+}
 
-	do {
-		if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-			break;
+static int read_and_truncate_page(struct btrfs_path *path,
+				  struct btrfs_file_extent_item *fi,
+				  int start, int len, char *dest)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
+	u64 aligned_start = round_down(start, fs_info->sectorsize);
+	u8 extent_type;
+	char *buf;
+	int page_off = start - aligned_start;
+	int page_len = fs_info->sectorsize - page_off;
+	int ret;
+
+	ASSERT(start + len <= aligned_start + fs_info->sectorsize);
+	buf = malloc_cache_aligned(fs_info->sectorsize);
+	if (!buf)
+		return -ENOMEM;
+
+	extent_type = btrfs_file_extent_type(leaf, fi);
+	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+		ret = btrfs_read_extent_inline(path, fi, buf);
+		memcpy(dest, buf + page_off, min(page_len, ret));
+		free(buf);
+		return len;
+	}
 
-		extent = btrfs_path_item_ptr(&path,
-					     struct btrfs_file_extent_item);
+	ret = btrfs_read_extent_reg(path, fi,
+			round_down(start, fs_info->sectorsize),
+			fs_info->sectorsize, buf);
+	if (ret < 0) {
+		free(buf);
+		return ret;
+	}
+	memcpy(dest, buf + page_off, page_len);
+	free(buf);
+	return len;
+}
 
-		if (extent->type == BTRFS_FILE_EXTENT_INLINE) {
-			btrfs_file_extent_item_to_cpu_inl(extent);
-			rd = btrfs_read_extent_inline(&path, extent, offset,
-						      size, buf);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+		    char *dest)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_path path;
+	struct btrfs_key key;
+	u64 aligned_start = round_down(file_offset, fs_info->sectorsize);
+	u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize);
+	u64 next_offset;
+	u64 cur = aligned_start;
+	int ret = 0;
+
+	btrfs_init_path(&path);
+
+	/* Set the whole dest all zero, so we won't need to bother holes */
+	memset(dest, 0, len);
+
+	/* Read out the leading unaligned part */
+	if (aligned_start != file_offset) {
+		ret = lookup_data_extent(root, &path, ino, aligned_start,
+					 &next_offset);
+		if (ret < 0)
+			goto out;
+		if (ret == 0) {
+			/* Read the unaligned part out*/
+			fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+					struct btrfs_file_extent_item);
+			ret = read_and_truncate_page(&path, fi, file_offset,
+					round_up(file_offset, fs_info->sectorsize) -
+					file_offset, dest);
+			if (ret < 0)
+				goto out;
+			cur += fs_info->sectorsize;
 		} else {
-			btrfs_file_extent_item_to_cpu(extent);
-			rd = btrfs_read_extent_reg(&path, extent, offset, size,
-						   buf);
+			/* The whole file is a hole */
+			if (!next_offset) {
+				memset(dest, 0, len);
+				return len;
+			}
+			cur = next_offset;
 		}
+	}
+
+	/* Read the aligned part */
+	while (cur < aligned_end) {
+		u64 extent_num_bytes;
+		u8 type;
 
-		if (rd == -1ULL) {
-			printf("%s: Error reading extent\n", __func__);
-			rd_all = -1;
+		btrfs_release_path(&path);
+		ret = lookup_data_extent(root, &path, ino, cur, &next_offset);
+		if (ret < 0)
 			goto out;
+		if (ret > 0) {
+			/* No next, direct exit */
+			if (!next_offset) {
+				ret = 0;
+				goto out;
+			}
+		}
+		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				    struct btrfs_file_extent_item);
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		type = btrfs_file_extent_type(path.nodes[0], fi);
+		if (type == BTRFS_FILE_EXTENT_INLINE) {
+			ret = btrfs_read_extent_inline(&path, fi, dest);
+			goto out;
+		}
+		/* Skip holes, as we have zeroed the dest */
+		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+		    btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) {
+			cur = key.offset + btrfs_file_extent_num_bytes(
+					path.nodes[0], fi);
+			continue;
 		}
 
-		offset = 0;
-		buf += rd;
-		rd_all += rd;
-		size -= rd;
-
-		if (!size)
-			break;
-	} while (!(res = btrfs_next_slot(&path)));
-
-	if (res)
-		return -1ULL;
+		/* Read the remaining part of the extent */
+		extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0],
+							       fi);
+		ret = btrfs_read_extent_reg(&path, fi, cur,
+				min(extent_num_bytes, aligned_end - cur),
+				dest + cur - file_offset);
+		if (ret < 0)
+			goto out;
+		cur += min(extent_num_bytes, aligned_end - cur);
+	}
 
+	/* Read the tailing unaligned part*/
+	if (file_offset + len != aligned_end) {
+		btrfs_release_path(&path);
+		ret = lookup_data_extent(root, &path, ino, aligned_end,
+					 &next_offset);
+		/* <0 is error, >0 means no extent */
+		if (ret)
+			goto out;
+		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				    struct btrfs_file_extent_item);
+		ret = read_and_truncate_page(&path, fi, aligned_end,
+				file_offset + len - aligned_end,
+				dest + aligned_end - file_offset);
+	}
 out:
-	btrfs_free_path(&path);
-	return rd_all;
+	btrfs_release_path(&path);
+	if (ret < 0)
+		return ret;
+	return len;
 }
diff --git a/fs/btrfs/kernel-shared/btrfs_tree.h b/fs/btrfs/kernel-shared/btrfs_tree.h
new file mode 100644
index 00000000000..6a76d1e456f
--- /dev/null
+++ b/fs/btrfs/kernel-shared/btrfs_tree.h
@@ -0,0 +1,1333 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copied from kernel/include/uapi/linux/btrfs_btree.h.
+ *
+ * Only modified the header.
+ */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __BTRFS_TREE_H__
+#define __BTRFS_TREE_H__
+
+#include <linux/types.h>
+
+#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+
+/*
+ * The max metadata block size (node size).
+ *
+ * This limit is somewhat artificial. The memmove and tree block locking cost
+ * go up with larger node size.
+ */
+#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
+
+/*
+ * We can actually store much bigger names, but lets not confuse the rest
+ * of linux.
+ *
+ * btrfs_dir_item::name_len follows this limitation.
+ */
+#define BTRFS_NAME_LEN 255
+
+/*
+ * Objectids start from here.
+ *
+ * Check btrfs_disk_key for the meaning of objectids.
+ */
+
+/*
+ * Root tree holds pointers to all of the tree roots.
+ * Without special mention, the root tree contains the root bytenr of all other
+ * trees, except the chunk tree and the log tree.
+ *
+ * The super block contains the root bytenr of this tree.
+ */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/*
+ * Extent tree stores information about which extents are in use, and backrefs
+ * for each extent.
+ */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * Chunk tree stores btrfs logical address -> physical address mapping.
+ *
+ * The super block contains part of chunk tree for bootstrap, and contains
+ * the root bytenr of this tree.
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * Device tree stores info about which areas of a given device are in use,
+ * and physical address -> btrfs logical address mapping.
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* The fs tree is the first subvolume tree, storing files and directories. */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* Shows the directory objectid inside the root tree. */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* Csum tree holds checksums of all the data extents. */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
+/* Quota tree holds quota configuration and tracking. */
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
+/* UUID tree stores items that use the BTRFS_UUID_KEY* types. */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
+/* Free space cache tree (v2 space cache) tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
+/* Indicates device stats in the device tree. */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
+/* For storing balance parameters in the root tree. */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
+/* Orhpan objectid for tracking unlinked/truncated files. */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* Does write ahead logging to speed up fsyncs. */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* For space balancing. */
+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+
+/* Extent checksums, shared between the csum tree and log trees. */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
+/* For storing free space cache (v1 space cache). */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/* The inode number assigned to the special inode for storing free ino cache. */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
+/* Dummy objectid represents multiple objectids. */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/* All files have objectids in this range. */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * The device items go into the chunk tree.
+ *
+ * The key is in the form
+ * (BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY,  <device_id>)
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
+
+/*
+ * Types start from here.
+ *
+ * Check btrfs_disk_key for details about types.
+ */
+
+/*
+ * Inode items have the data typically returned from stat and store other
+ * info about object characteristics.
+ *
+ * There is one for every file and dir in the FS.
+ */
+#define BTRFS_INODE_ITEM_KEY		1
+/* reserve 2-11 close to the inode for later flexibility */
+#define BTRFS_INODE_REF_KEY		12
+#define BTRFS_INODE_EXTREF_KEY		13
+#define BTRFS_XATTR_ITEM_KEY		24
+#define BTRFS_ORPHAN_ITEM_KEY		48
+
+/*
+ * Dir items are the name -> inode pointers in a directory.
+ *
+ * There is one for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY  60
+#define BTRFS_DIR_LOG_INDEX_KEY 72
+#define BTRFS_DIR_ITEM_KEY	84
+#define BTRFS_DIR_INDEX_KEY	96
+
+/* Stores info (position, size ...) about a data extent of a file */
+#define BTRFS_EXTENT_DATA_KEY	108
+
+/*
+ * Extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
+ */
+#define BTRFS_EXTENT_CSUM_KEY	128
+
+/*
+ * Root items point to tree roots.
+ *
+ * They are typically in the root tree used by the super block to find all the
+ * other trees.
+ */
+#define BTRFS_ROOT_ITEM_KEY	132
+
+/*
+ * Root backrefs tie subvols and snapshots to the directory entries that
+ * reference them.
+ */
+#define BTRFS_ROOT_BACKREF_KEY	144
+
+/*
+ * Root refs make a fast index for listing all of the snapshots and
+ * subvolumes referenced by a given root.  They point directly to the
+ * directory item in the root that references the subvol.
+ */
+#define BTRFS_ROOT_REF_KEY	156
+
+/*
+ * Extent items are in the extent tree.
+ *
+ * These record which blocks are used, and how many references there are.
+ */
+#define BTRFS_EXTENT_ITEM_KEY	168
+
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY	169
+
+#define BTRFS_TREE_BLOCK_REF_KEY	176
+
+#define BTRFS_EXTENT_DATA_REF_KEY	178
+
+#define BTRFS_EXTENT_REF_V0_KEY		180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY	182
+
+#define BTRFS_SHARED_DATA_REF_KEY	184
+
+/*
+ * Block groups give us hints into the extent allocation trees.
+ *
+ * Stores how many free space there is in a block group.
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents.
+ *
+ * A free space bitmap is keyed on (start, FREE_SPACE_BITMAP, length).
+ * The corresponding item is a bitmap with (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
+#define BTRFS_DEV_EXTENT_KEY	204
+#define BTRFS_DEV_ITEM_KEY	216
+#define BTRFS_CHUNK_ITEM_KEY	228
+
+/*
+ * Records the overall state of the qgroups.
+ *
+ * There's only one instance of this key present,
+ * (0, BTRFS_QGROUP_STATUS_KEY, 0)
+ */
+#define BTRFS_QGROUP_STATUS_KEY         240
+/*
+ * Records the currently used space of the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_INFO_KEY           242
+
+/*
+ * Contains the user configured limits for the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_LIMIT_KEY          244
+
+/*
+ * Records the child-parent relationship of qgroups. For
+ * each relation, 2 keys are present:
+ * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
+ * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
+ */
+#define BTRFS_QGROUP_RELATION_KEY       246
+
+/* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. */
+#define BTRFS_BALANCE_ITEM_KEY	248
+
+/*
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
+ */
+#define BTRFS_TEMPORARY_ITEM_KEY	248
+
+/* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY */
+#define BTRFS_DEV_STATS_KEY		249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ *   stats
+ *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY	249
+
+/*
+ * Persistently stores the device replace state in the device tree.
+ *
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY	250
+
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ *
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#define BTRFS_UUID_KEY_SUBVOL	251	/* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL	252	/* for UUIDs assigned to
+						 * received subvols */
+
+/*
+ * String items are for debugging.
+ *
+ * They just store a short string of data in the FS.
+ */
+#define BTRFS_STRING_ITEM_KEY	253
+
+
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
+/* Csum types */
+enum btrfs_csum_type {
+	BTRFS_CSUM_TYPE_CRC32	= 0,
+	BTRFS_CSUM_TYPE_XXHASH	= 1,
+	BTRFS_CSUM_TYPE_SHA256	= 2,
+	BTRFS_CSUM_TYPE_BLAKE2	= 3,
+};
+
+/*
+ * Flags definitions for directory entry item type.
+ *
+ * Used by:
+ * struct btrfs_dir_item.type
+ *
+ * Values 0..7 must match common file type values in fs_types.h.
+ */
+#define BTRFS_FT_UNKNOWN	0
+#define BTRFS_FT_REG_FILE	1
+#define BTRFS_FT_DIR		2
+#define BTRFS_FT_CHRDEV		3
+#define BTRFS_FT_BLKDEV		4
+#define BTRFS_FT_FIFO		5
+#define BTRFS_FT_SOCK		6
+#define BTRFS_FT_SYMLINK	7
+#define BTRFS_FT_XATTR		8
+#define BTRFS_FT_MAX		9
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+/*
+ * The key defines the order in the tree, and so it also defines (optimal)
+ * block layout.
+ *
+ * Objectid and offset are interpreted based on type.
+ * While normally for objectid, it either represents a root number, or an
+ * inode number.
+ *
+ * Type tells us things about the object, and is a kind of stream selector.
+ * Check the following URL for full references about btrfs_disk_key/btrfs_key:
+ * https://btrfs.wiki.kernel.org/index.php/Btree_Items
+ *
+ * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
+ * in cpu native order.  Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+	__le64 objectid;
+	__u8 type;
+	__le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+	__u64 objectid;
+	__u8 type;
+	__u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_dev_item {
+	/* The internal btrfs device id */
+	__le64 devid;
+
+	/* Size of the device */
+	__le64 total_bytes;
+
+	/* Bytes used */
+	__le64 bytes_used;
+
+	/* Optimal io alignment for this device */
+	__le32 io_align;
+
+	/* Optimal io width for this device */
+	__le32 io_width;
+
+	/* Minimal io size for this device */
+	__le32 sector_size;
+
+	/* Type and info about this device */
+	__le64 type;
+
+	/* Expected generation for this device */
+	__le64 generation;
+
+	/*
+	 * Starting byte of this partition on the device,
+	 * to allow for stripe alignment in the future.
+	 */
+	__le64 start_offset;
+
+	/* Grouping information for allocation decisions */
+	__le32 dev_group;
+
+	/* Optimal seek speed 0-100 where 100 is fastest */
+	__u8 seek_speed;
+
+	/* Optimal bandwidth 0-100 where 100 is fastest */
+	__u8 bandwidth;
+
+	/* Btrfs generated uuid for this device */
+	__u8 uuid[BTRFS_UUID_SIZE];
+
+	/* UUID of FS who owns this device */
+	__u8 fsid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+	__le64 devid;
+	__le64 offset;
+	__u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+	/* Size of this chunk in bytes */
+	__le64 length;
+
+	/* Objectid of the root referencing this chunk */
+	__le64 owner;
+
+	__le64 stripe_len;
+	__le64 type;
+
+	/* Optimal io alignment for this chunk */
+	__le32 io_align;
+
+	/* Optimal io width for this chunk */
+	__le32 io_width;
+
+	/* Minimal io size for this chunk */
+	__le32 sector_size;
+
+	/*
+	 * 2^16 stripes is quite a lot, a second limit is the size of a single
+	 * item in the btree.
+	 */
+	__le16 num_stripes;
+
+	/* Sub stripes only matter for raid10 */
+	__le16 sub_stripes;
+	struct btrfs_stripe stripe;
+	/* additional stripes go here */
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_EXTENT	1
+#define BTRFS_FREE_SPACE_BITMAP	2
+
+struct btrfs_free_space_entry {
+	__le64 offset;
+	__le64 bytes;
+	__u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+	struct btrfs_disk_key location;
+	__le64 generation;
+	__le64 num_entries;
+	__le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
+#define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
+
+/* Super block flags */
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
+
+#define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2	(1ULL << 34)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID	(1ULL << 35)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
+
+
+/*
+ * Items in the extent tree are used to record the objectid of the
+ * owner of the block and the number of references.
+ */
+struct btrfs_extent_item {
+	__le64 refs;
+	__le64 generation;
+	__le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
+	__le32 refs;
+} __attribute__ ((__packed__));
+
+
+#define BTRFS_EXTENT_FLAG_DATA		(1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK	(1ULL << 1)
+
+/* Use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF	(1ULL << 8)
+
+/*
+ * This flag is only used internally by scrub and may be changed at any time
+ * it is only declared here to avoid collisions.
+ */
+#define BTRFS_EXTENT_FLAG_SUPER		(1ULL << 48)
+
+struct btrfs_tree_block_info {
+	struct btrfs_disk_key key;
+	__u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+	__le64 root;
+	__le64 objectid;
+	__le64 offset;
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+	__u8 type;
+	__le64 offset;
+} __attribute__ ((__packed__));
+
+/* Old style backrefs item */
+struct btrfs_extent_ref_v0 {
+	__le64 root;
+	__le64 generation;
+	__le64 objectid;
+	__le32 count;
+} __attribute__ ((__packed__));
+
+
+/* Dev extents record used space on individual devices.
+ *
+ * The owner field points back to the chunk allocation mapping tree that
+ * allocated the extent.
+ * The chunk tree uuid field is a way to double check the owner.
+ */
+struct btrfs_dev_extent {
+	__le64 chunk_tree;
+	__le64 chunk_objectid;
+	__le64 chunk_offset;
+	__le64 length;
+	__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+	__le64 index;
+	__le16 name_len;
+	/* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_extref {
+	__le64 parent_objectid;
+	__le64 index;
+	__le16 name_len;
+	__u8   name[0];
+	/* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+	__le64 sec;
+	__le32 nsec;
+} __attribute__ ((__packed__));
+
+/* Inode flags */
+#define BTRFS_INODE_NODATASUM		(1 << 0)
+#define BTRFS_INODE_NODATACOW		(1 << 1)
+#define BTRFS_INODE_READONLY		(1 << 2)
+#define BTRFS_INODE_NOCOMPRESS		(1 << 3)
+#define BTRFS_INODE_PREALLOC		(1 << 4)
+#define BTRFS_INODE_SYNC		(1 << 5)
+#define BTRFS_INODE_IMMUTABLE		(1 << 6)
+#define BTRFS_INODE_APPEND		(1 << 7)
+#define BTRFS_INODE_NODUMP		(1 << 8)
+#define BTRFS_INODE_NOATIME		(1 << 9)
+#define BTRFS_INODE_DIRSYNC		(1 << 10)
+#define BTRFS_INODE_COMPRESS		(1 << 11)
+
+#define BTRFS_INODE_ROOT_ITEM_INIT	(1 << 31)
+
+#define BTRFS_INODE_FLAG_MASK						\
+	(BTRFS_INODE_NODATASUM |					\
+	 BTRFS_INODE_NODATACOW |					\
+	 BTRFS_INODE_READONLY |						\
+	 BTRFS_INODE_NOCOMPRESS |					\
+	 BTRFS_INODE_PREALLOC |						\
+	 BTRFS_INODE_SYNC |						\
+	 BTRFS_INODE_IMMUTABLE |					\
+	 BTRFS_INODE_APPEND |						\
+	 BTRFS_INODE_NODUMP |						\
+	 BTRFS_INODE_NOATIME |						\
+	 BTRFS_INODE_DIRSYNC |						\
+	 BTRFS_INODE_COMPRESS |						\
+	 BTRFS_INODE_ROOT_ITEM_INIT)
+
+struct btrfs_inode_item {
+	/* Nfs style generation number */
+	__le64 generation;
+	/* Transid that last touched this inode */
+	__le64 transid;
+	__le64 size;
+	__le64 nbytes;
+	__le64 block_group;
+	__le32 nlink;
+	__le32 uid;
+	__le32 gid;
+	__le32 mode;
+	__le64 rdev;
+	__le64 flags;
+
+	/* Modification sequence number for NFS */
+	__le64 sequence;
+
+	/*
+	 * A little future expansion, for more than this we can just grow the
+	 * inode item and version it
+	 */
+	__le64 reserved[4];
+	struct btrfs_timespec atime;
+	struct btrfs_timespec ctime;
+	struct btrfs_timespec mtime;
+	struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+	__le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+	struct btrfs_disk_key location;
+	__le64 transid;
+	__le16 data_len;
+	__le16 name_len;
+	__u8 type;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY	(1ULL << 0)
+
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD		(1ULL << 48)
+
+struct btrfs_root_item {
+	struct btrfs_inode_item inode;
+	__le64 generation;
+	__le64 root_dirid;
+	__le64 bytenr;
+	__le64 byte_limit;
+	__le64 bytes_used;
+	__le64 last_snapshot;
+	__le64 flags;
+	__le32 refs;
+	struct btrfs_disk_key drop_progress;
+	__u8 drop_level;
+	__u8 level;
+
+	/*
+	 * The following fields appear after subvol_uuids+subvol_times
+	 * were introduced.
+	 */
+
+	/*
+	 * This generation number is used to test if the new fields are valid
+	 * and up to date while reading the root item. Every time the root item
+	 * is written out, the "generation" field is copied into this field. If
+	 * anyone ever mounted the fs with an older kernel, we will have
+	 * mismatching generation values here and thus must invalidate the
+	 * new fields. See btrfs_update_root and btrfs_find_last_root for
+	 * details.
+	 * The offset of generation_v2 is also used as the start for the memset
+	 * when invalidating the fields.
+	 */
+	__le64 generation_v2;
+	__u8 uuid[BTRFS_UUID_SIZE];
+	__u8 parent_uuid[BTRFS_UUID_SIZE];
+	__u8 received_uuid[BTRFS_UUID_SIZE];
+	__le64 ctransid; /* Updated when an inode changes */
+	__le64 otransid; /* Trans when created */
+	__le64 stransid; /* Trans when sent. Non-zero for received subvol. */
+	__le64 rtransid; /* Trans when received. Non-zero for received subvol.*/
+	struct btrfs_timespec ctime;
+	struct btrfs_timespec otime;
+	struct btrfs_timespec stime;
+	struct btrfs_timespec rtime;
+	__le64 reserved[8]; /* For future */
+} __attribute__ ((__packed__));
+
+/* This is used for both forward and backward root refs */
+struct btrfs_root_ref {
+	__le64 dirid;
+	__le64 sequence;
+	__le16 name_len;
+} __attribute__ ((__packed__));
+
+struct btrfs_disk_balance_args {
+	/*
+	 * Profiles to operate on.
+	 *
+	 * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+	 */
+	__le64 profiles;
+
+	/*
+	 * Usage filter
+	 * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
+	 * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
+	 */
+	union {
+		__le64 usage;
+		struct {
+			__le32 usage_min;
+			__le32 usage_max;
+		};
+	};
+
+	/* Devid filter */
+	__le64 devid;
+
+	/* Devid subset filter [pstart..pend) */
+	__le64 pstart;
+	__le64 pend;
+
+	/* Btrfs virtual address space subset filter [vstart..vend) */
+	__le64 vstart;
+	__le64 vend;
+
+	/*
+	 * Profile to convert to.
+	 *
+	 * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+	 */
+	__le64 target;
+
+	/* BTRFS_BALANCE_ARGS_* */
+	__le64 flags;
+
+	/*
+	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'.
+	 * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+	 * and maximum.
+	 */
+	union {
+		__le64 limit;
+		struct {
+			__le32 limit_min;
+			__le32 limit_max;
+		};
+	};
+
+	/*
+	 * Process chunks that cross stripes_min..stripes_max devices,
+	 * BTRFS_BALANCE_ARGS_STRIPES_RANGE.
+	 */
+	__le32 stripes_min;
+	__le32 stripes_max;
+
+	__le64 unused[6];
+} __attribute__ ((__packed__));
+
+/*
+ * Stores balance parameters to disk so that balance can be properly
+ * resumed after crash or unmount.
+ */
+struct btrfs_balance_item {
+	/* BTRFS_BALANCE_* */
+	__le64 flags;
+
+	struct btrfs_disk_balance_args data;
+	struct btrfs_disk_balance_args meta;
+	struct btrfs_disk_balance_args sys;
+
+	__le64 unused[4];
+} __attribute__ ((__packed__));
+
+enum {
+	BTRFS_FILE_EXTENT_INLINE   = 0,
+	BTRFS_FILE_EXTENT_REG      = 1,
+	BTRFS_FILE_EXTENT_PREALLOC = 2,
+	BTRFS_NR_FILE_EXTENT_TYPES = 3,
+};
+
+enum btrfs_compression_type {
+	BTRFS_COMPRESS_NONE  = 0,
+	BTRFS_COMPRESS_ZLIB  = 1,
+	BTRFS_COMPRESS_LZO   = 2,
+	BTRFS_COMPRESS_ZSTD  = 3,
+	BTRFS_NR_COMPRESS_TYPES = 4,
+};
+
+struct btrfs_file_extent_item {
+	/* Transaction id that created this extent */
+	__le64 generation;
+	/*
+	 * Max number of bytes to hold this extent in ram.
+	 *
+	 * When we split a compressed extent we can't know how big each of the
+	 * resulting pieces will be.  So, this is an upper limit on the size of
+	 * the extent in ram instead of an exact limit.
+	 */
+	__le64 ram_bytes;
+
+	/*
+	 * 32 bits for the various ways we might encode the data,
+	 * including compression and encryption.  If any of these
+	 * are set to something a given disk format doesn't understand
+	 * it is treated like an incompat flag for reading and writing,
+	 * but not for stat.
+	 */
+	__u8 compression;
+	__u8 encryption;
+	__le16 other_encoding; /* Spare for later use */
+
+	/* Are we inline data or a real extent? */
+	__u8 type;
+
+	/*
+	 * Disk space consumed by the extent, checksum blocks are not included
+	 * in these numbers
+	 *
+	 * At this offset in the structure, the inline extent data start.
+	 */
+	__le64 disk_bytenr;
+	__le64 disk_num_bytes;
+
+	/*
+	 * The logical offset inside the file extent.
+	 *
+	 * This allows a file extent to point into the middle of an existing
+	 * extent on disk, sharing it between two snapshots (useful if some
+	 * bytes in the middle of the extent have changed).
+	 */
+	__le64 offset;
+
+	/*
+	 * The logical number of bytes this file extent is referencing (no
+	 * csums included).
+	 *
+	 * This always reflects the size uncompressed and without encoding.
+	 */
+	__le64 num_bytes;
+
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+	__u8 csum;
+} __attribute__ ((__packed__));
+
+enum btrfs_dev_stat_values {
+	/* Disk I/O failure stats */
+	BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+	BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+	BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+	/* Stats for indirect indications for I/O failures */
+	BTRFS_DEV_STAT_CORRUPTION_ERRS, /* Checksum error, bytenr error or
+					 * contents is illegal: this is an
+					 * indication that the block was damaged
+					 * during read or write, or written to
+					 * wrong location or read from wrong
+					 * location */
+	BTRFS_DEV_STAT_GENERATION_ERRS, /* An indication that blocks have not
+					 * been written */
+
+	BTRFS_DEV_STAT_VALUES_MAX
+};
+
+struct btrfs_dev_stats_item {
+	/*
+	 * Grow this item struct at the end for future enhancements and keep
+	 * the existing values unchanged.
+	 */
+	__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
+} __attribute__ ((__packed__));
+
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
+
+struct btrfs_dev_replace_item {
+	/*
+	 * Grow this item struct at the end for future enhancements and keep
+	 * the existing values unchanged.
+	 */
+	__le64 src_devid;
+	__le64 cursor_left;
+	__le64 cursor_right;
+	__le64 cont_reading_from_srcdev_mode;
+
+	__le64 replace_state;
+	__le64 time_started;
+	__le64 time_stopped;
+	__le64 num_write_errors;
+	__le64 num_uncorrectable_read_errors;
+} __attribute__ ((__packed__));
+
+/* Different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA	(1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0		(1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_RAID1C4       (1ULL << 10)
+#define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+					 BTRFS_SPACE_INFO_GLOBAL_RSV)
+
+enum btrfs_raid_types {
+	BTRFS_RAID_RAID10,
+	BTRFS_RAID_RAID1,
+	BTRFS_RAID_DUP,
+	BTRFS_RAID_RAID0,
+	BTRFS_RAID_SINGLE,
+	BTRFS_RAID_RAID5,
+	BTRFS_RAID_RAID6,
+	BTRFS_RAID_RAID1C3,
+	BTRFS_RAID_RAID1C4,
+	BTRFS_NR_RAID_TYPES
+};
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
+					 BTRFS_BLOCK_GROUP_SYSTEM |  \
+					 BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK	(BTRFS_BLOCK_GROUP_RAID0 |   \
+					 BTRFS_BLOCK_GROUP_RAID1 |   \
+					 BTRFS_BLOCK_GROUP_RAID1C3 | \
+					 BTRFS_BLOCK_GROUP_RAID1C4 | \
+					 BTRFS_BLOCK_GROUP_RAID5 |   \
+					 BTRFS_BLOCK_GROUP_RAID6 |   \
+					 BTRFS_BLOCK_GROUP_DUP |     \
+					 BTRFS_BLOCK_GROUP_RAID10)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5 |   \
+					 BTRFS_BLOCK_GROUP_RAID6)
+
+#define BTRFS_BLOCK_GROUP_RAID1_MASK	(BTRFS_BLOCK_GROUP_RAID1 |   \
+					 BTRFS_BLOCK_GROUP_RAID1C3 | \
+					 BTRFS_BLOCK_GROUP_RAID1C4)
+
+/*
+ * We need a bit for restriper to be able to tell when chunks of type
+ * SINGLE are available.  This "extended" profile format is used in
+ * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
+ * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
+ * to avoid remappings between two formats in future.
+ */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE	(1ULL << 48)
+
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV	(1ULL << 49)
+
+#define BTRFS_EXTENDED_PROFILE_MASK	(BTRFS_BLOCK_GROUP_PROFILE_MASK | \
+					 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
+
+static inline __u64 chunk_to_extended(__u64 flags)
+{
+	if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
+		flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+	return flags;
+}
+static inline __u64 extended_to_chunk(__u64 flags)
+{
+	return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+}
+
+struct btrfs_block_group_item {
+	__le64 used;
+	__le64 chunk_objectid;
+	__le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_info {
+	__le32 extent_count;
+	__le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
+#define BTRFS_QGROUP_LEVEL_SHIFT		48
+static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
+{
+	return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
+/* Is subvolume quota turned on? */
+#define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
+
+/* Is qgroup rescan running? */
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
+
+/*
+ * Some qgroup entries are known to be out of date, either because the
+ * configuration has changed in a way that makes a rescan necessary, or
+ * because the fs has been mounted with a non-qgroup-aware version.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT	(1ULL << 2)
+
+#define BTRFS_QGROUP_STATUS_VERSION        1
+
+struct btrfs_qgroup_status_item {
+	__le64 version;
+	/*
+	 * The generation is updated during every commit. As older
+	 * versions of btrfs are not aware of qgroups, it will be
+	 * possible to detect inconsistencies by checking the
+	 * generation on mount time.
+	 */
+	__le64 generation;
+
+	/* Flag definitions see above */
+	__le64 flags;
+
+	/*
+	 * Only used during scanning to record the progress of the scan.
+	 * It contains a logical address.
+	 */
+	__le64 rescan;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_info_item {
+	__le64 generation;
+	__le64 rfer;
+	__le64 rfer_cmpr;
+	__le64 excl;
+	__le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+/*
+ * Flags definition for qgroup limits
+ *
+ * Used by:
+ * struct btrfs_qgroup_limit.flags
+ * struct btrfs_qgroup_limit_item.flags
+ */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER	(1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL	(1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER	(1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL	(1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR	(1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR	(1ULL << 5)
+
+struct btrfs_qgroup_limit_item {
+	/* Only updated when any of the other values change. */
+	__le64 flags;
+	__le64 max_rfer;
+	__le64 max_excl;
+	__le64 rsv_rfer;
+	__le64 rsv_excl;
+} __attribute__ ((__packed__));
+
+/*
+ * Just in case we somehow lose the roots and are not able to mount,
+ * we store an array of the roots from previous transactions in the super.
+ */
+#define BTRFS_NUM_BACKUP_ROOTS 4
+struct btrfs_root_backup {
+	__le64 tree_root;
+	__le64 tree_root_gen;
+
+	__le64 chunk_root;
+	__le64 chunk_root_gen;
+
+	__le64 extent_root;
+	__le64 extent_root_gen;
+
+	__le64 fs_root;
+	__le64 fs_root_gen;
+
+	__le64 dev_root;
+	__le64 dev_root_gen;
+
+	__le64 csum_root;
+	__le64 csum_root_gen;
+
+	__le64 total_bytes;
+	__le64 bytes_used;
+	__le64 num_devices;
+	/* future */
+	__le64 unused_64[4];
+
+	u8 tree_root_level;
+	u8 chunk_root_level;
+	u8 extent_root_level;
+	u8 fs_root_level;
+	u8 dev_root_level;
+	u8 csum_root_level;
+	/* future and to align */
+	u8 unused_8[10];
+} __attribute__ ((__packed__));
+
+/*
+ * This is a very generous portion of the super block, giving us room to
+ * translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+
+#define BTRFS_LABEL_SIZE 256
+
+/* The super block basically lists the main trees of the FS. */
+struct btrfs_super_block {
+	/* The first 4 fields must match struct btrfs_header */
+	u8 csum[BTRFS_CSUM_SIZE];
+	/* FS specific UUID, visible to user */
+	u8 fsid[BTRFS_FSID_SIZE];
+	__le64 bytenr; /* this block number */
+	__le64 flags;
+
+	/* Allowed to be different from the btrfs_header from here own down. */
+	__le64 magic;
+	__le64 generation;
+	__le64 root;
+	__le64 chunk_root;
+	__le64 log_root;
+
+	/* This will help find the new super based on the log root. */
+	__le64 log_root_transid;
+	__le64 total_bytes;
+	__le64 bytes_used;
+	__le64 root_dir_objectid;
+	__le64 num_devices;
+	__le32 sectorsize;
+	__le32 nodesize;
+	__le32 __unused_leafsize;
+	__le32 stripesize;
+	__le32 sys_chunk_array_size;
+	__le64 chunk_root_generation;
+	__le64 compat_flags;
+	__le64 compat_ro_flags;
+	__le64 incompat_flags;
+	__le16 csum_type;
+	u8 root_level;
+	u8 chunk_root_level;
+	u8 log_root_level;
+	struct btrfs_dev_item dev_item;
+
+	char label[BTRFS_LABEL_SIZE];
+
+	__le64 cache_generation;
+	__le64 uuid_tree_generation;
+
+	/* The UUID written into btree blocks */
+	u8 metadata_uuid[BTRFS_FSID_SIZE];
+
+	/* Future expansion */
+	__le64 reserved[28];
+	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
+} __attribute__ ((__packed__));
+
+/*
+ * Feature flags
+ *
+ * Used by:
+ * struct btrfs_super_block::(compat|compat_ro|incompat)_flags
+ * struct btrfs_ioctl_feature_flags
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE		(1ULL << 0)
+
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3).  If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID	(1ULL << 1)
+
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO	(1ULL << 3)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD	(1ULL << 4)
+
+/*
+ * Older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy.  Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA	(1ULL << 5)
+
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF	(1ULL << 6)
+#define BTRFS_FEATURE_INCOMPAT_RAID56		(1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA	(1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES		(1ULL << 9)
+#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID	(1ULL << 10)
+#define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
+
+/*
+ * Compat flags that we support.
+ *
+ * If any incompat flags are set other than the ones specified below then we
+ * will fail to mount.
+ */
+#define BTRFS_FEATURE_COMPAT_SUPP		0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_SET		0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP			\
+	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
+	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
+
+#define BTRFS_FEATURE_INCOMPAT_SUPP			\
+	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
+	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
+	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\
+	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
+	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
+	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\
+	 BTRFS_FEATURE_INCOMPAT_NO_HOLES	|	\
+	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\
+	 BTRFS_FEATURE_INCOMPAT_RAID1C34)
+
+#define BTRFS_FEATURE_INCOMPAT_SAFE_SET			\
+	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR		0ULL
+
+#define BTRFS_BACKREF_REV_MAX		256
+#define BTRFS_BACKREF_REV_SHIFT		56
+#define BTRFS_BACKREF_REV_MASK		(((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+					 BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV		0
+#define BTRFS_MIXED_BACKREF_REV		1
+
+#define BTRFS_MAX_LEVEL 8
+
+/* Every tree block (leaf or node) starts with this header. */
+struct btrfs_header {
+	/* These first four must match the super block */
+	u8 csum[BTRFS_CSUM_SIZE];
+	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+	__le64 bytenr; /* Which block this node is supposed to live in */
+	__le64 flags;
+
+	/* Allowed to be different from the super from here on down. */
+	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+	__le64 generation;
+	__le64 owner;
+	__le32 nritems;
+	u8 level;
+} __attribute__ ((__packed__));
+
+/*
+ * A leaf is full of items. Offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area).
+ */
+struct btrfs_item {
+	struct btrfs_disk_key key;
+	__le32 offset;
+	__le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+	struct btrfs_header header;
+	struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * All non-leaf blocks are nodes, they hold only keys and pointers to children
+ * blocks.
+ */
+struct btrfs_key_ptr {
+	struct btrfs_disk_key key;
+	__le64 blockptr;
+	__le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+	struct btrfs_header header;
+	struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+#endif /* __BTRFS_TREE_H__ */
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
new file mode 100644
index 00000000000..a39ad72067d
--- /dev/null
+++ b/fs/btrfs/root-tree.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "ctree.h"
+
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+			struct btrfs_root_item *item, struct btrfs_key *key)
+{
+	struct btrfs_path *path;
+	struct btrfs_key search_key;
+	struct btrfs_key found_key;
+	struct extent_buffer *l;
+	int ret;
+	int slot;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	search_key.objectid = objectid;
+	search_key.type = BTRFS_ROOT_ITEM_KEY;
+	search_key.offset = (u64)-1;
+
+	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (path->slots[0] == 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	BUG_ON(ret == 0);
+	l = path->nodes[0];
+	slot = path->slots[0] - 1;
+	btrfs_item_key_to_cpu(l, &found_key, slot);
+	if (found_key.type != BTRFS_ROOT_ITEM_KEY ||
+	    found_key.objectid != objectid) {
+		ret = -ENOENT;
+		goto out;
+	}
+	read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+			   sizeof(*item));
+	memcpy(key, &found_key, sizeof(found_key));
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
diff --git a/fs/btrfs/root.c b/fs/btrfs/root.c
deleted file mode 100644
index 127b67fd1c8..00000000000
--- a/fs/btrfs/root.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-
-static void read_root_item(struct btrfs_path *p, struct btrfs_root_item *item)
-{
-	u32 len;
-	int reset = 0;
-
-	len = btrfs_path_item_size(p);
-	memcpy(item, btrfs_path_item_ptr(p, struct btrfs_root_item), len);
-	btrfs_root_item_to_cpu(item);
-
-	if (len < sizeof(*item))
-		reset = 1;
-	if (!reset && item->generation != item->generation_v2) {
-		if (item->generation_v2 != 0)
-			printf("%s: generation != generation_v2 in root item",
-			       __func__);
-		reset = 1;
-	}
-	if (reset) {
-		memset(&item->generation_v2, 0,
-		       sizeof(*item) - offsetof(struct btrfs_root_item,
-						generation_v2));
-	}
-}
-
-int btrfs_find_root(u64 objectid, struct btrfs_root *root,
-		    struct btrfs_root_item *root_item)
-{
-	struct btrfs_path path;
-	struct btrfs_root_item my_root_item;
-
-	if (!btrfs_search_tree_key_type(&btrfs_info.tree_root, objectid,
-					BTRFS_ROOT_ITEM_KEY, &path))
-		return -1;
-
-	if (!root_item)
-		root_item = &my_root_item;
-	read_root_item(&path, root_item);
-
-	if (root) {
-		root->objectid = objectid;
-		root->bytenr = root_item->bytenr;
-		root->root_dirid = root_item->root_dirid;
-	}
-
-	btrfs_free_path(&path);
-	return 0;
-}
-
-u64 btrfs_lookup_root_ref(u64 subvolid, struct btrfs_root_ref *refp, char *name)
-{
-	struct btrfs_path path;
-	struct btrfs_key *key;
-	struct btrfs_root_ref *ref;
-	u64 res = -1ULL;
-
-	key = btrfs_search_tree_key_type(&btrfs_info.tree_root, subvolid,
-					       BTRFS_ROOT_BACKREF_KEY, &path);
-
-	if (!key)
-		return -1ULL;
-
-	ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
-	btrfs_root_ref_to_cpu(ref);
-
-	if (refp)
-		*refp = *ref;
-
-	if (name) {
-		if (ref->name_len > BTRFS_VOL_NAME_MAX) {
-			printf("%s: volume name too long: %u\n", __func__,
-			       ref->name_len);
-			goto out;
-		}
-
-		memcpy(name, ref + 1, ref->name_len);
-	}
-
-	res = key->offset;
-out:
-	btrfs_free_path(&path);
-	return res;
-}
-
diff --git a/fs/btrfs/subvolume.c b/fs/btrfs/subvolume.c
index 06e54f33109..2815673bcd6 100644
--- a/fs/btrfs/subvolume.c
+++ b/fs/btrfs/subvolume.c
@@ -5,126 +5,230 @@
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
 #include <malloc.h>
+#include "ctree.h"
+#include "btrfs.h"
+#include "disk-io.h"
 
-static int get_subvol_name(u64 subvolid, char *name, int max_len)
+/*
+ * Resolve the path of ino inside subvolume @root into @path_ret.
+ *
+ * @path_ret must be at least PATH_MAX size.
+ */
+static int get_path_in_subvol(struct btrfs_root *root, u64 ino, char *path_ret)
 {
-	struct btrfs_root_ref rref;
-	struct btrfs_inode_ref iref;
-	struct btrfs_root root;
-	u64 dir;
-	char tmp[max(BTRFS_VOL_NAME_MAX, BTRFS_NAME_MAX)];
-	char *ptr;
-
-	ptr = name + max_len - 1;
-	*ptr = '\0';
-
-	while (subvolid != BTRFS_FS_TREE_OBJECTID) {
-		subvolid = btrfs_lookup_root_ref(subvolid, &rref, tmp);
-
-		if (subvolid == -1ULL)
-			return -1;
-
-		ptr -= rref.name_len + 1;
-		if (ptr < name)
-			goto too_long;
-
-		memcpy(ptr + 1, tmp, rref.name_len);
-		*ptr = '/';
-
-		if (btrfs_find_root(subvolid, &root, NULL))
-			return -1;
-
-		dir = rref.dirid;
-
-		while (dir != BTRFS_FIRST_FREE_OBJECTID) {
-			dir = btrfs_lookup_inode_ref(&root, dir, &iref, tmp);
-
-			if (dir == -1ULL)
-				return -1;
-
-			ptr -= iref.name_len + 1;
-			if (ptr < name)
-				goto too_long;
-
-			memcpy(ptr + 1, tmp, iref.name_len);
-			*ptr = '/';
+	struct btrfs_path path;
+	struct btrfs_key key;
+	char *tmp;
+	u64 cur = ino;
+	int ret = 0;
+
+	tmp = malloc(PATH_MAX);
+	if (!tmp)
+		return -ENOMEM;
+	tmp[0] = '\0';
+
+	btrfs_init_path(&path);
+	while (cur != BTRFS_FIRST_FREE_OBJECTID) {
+		struct btrfs_inode_ref *iref;
+		int name_len;
+
+		btrfs_release_path(&path);
+		key.objectid = cur;
+		key.type = BTRFS_INODE_REF_KEY;
+		key.offset = (u64)-1;
+
+		ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+		/* Impossible */
+		if (ret == 0)
+			ret = -EUCLEAN;
+		if (ret < 0)
+			goto out;
+		ret = btrfs_previous_item(root, &path, cur,
+					  BTRFS_INODE_REF_KEY);
+		if (ret > 0)
+			ret = -ENOENT;
+		if (ret < 0)
+			goto out;
+
+		strncpy(tmp, path_ret, PATH_MAX);
+		iref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				      struct btrfs_inode_ref);
+		name_len = btrfs_inode_ref_name_len(path.nodes[0],
+						    iref);
+		if (name_len > BTRFS_NAME_LEN) {
+			ret = -ENAMETOOLONG;
+			goto out;
 		}
+		read_extent_buffer(path.nodes[0], path_ret,
+				   (unsigned long)(iref + 1), name_len);
+		path_ret[name_len] = '/';
+		path_ret[name_len + 1] = '\0';
+		strncat(path_ret, tmp, PATH_MAX);
+
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		cur = key.offset;
 	}
-
-	if (ptr == name + max_len - 1) {
-		name[0] = '/';
-		name[1] = '\0';
-	} else {
-		memmove(name, ptr, name + max_len - ptr);
-	}
-
-	return 0;
-
-too_long:
-	printf("%s: subvolume name too long\n", __func__);
-	return -1;
+out:
+	btrfs_release_path(&path);
+	free(tmp);
+	return ret;
 }
 
-u64 btrfs_get_default_subvol_objectid(void)
+static int list_one_subvol(struct btrfs_root *root, char *path_ret)
 {
-	struct btrfs_dir_item item;
-
-	if (btrfs_lookup_dir_item(&btrfs_info.tree_root,
-				  btrfs_info.sb.root_dir_objectid, "default", 7,
-				  &item))
-		return BTRFS_FS_TREE_OBJECTID;
-	return item.location.objectid;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_path path;
+	struct btrfs_key key;
+	char *tmp;
+	u64 cur = root->root_key.objectid;
+	int ret = 0;
+
+	tmp = malloc(PATH_MAX);
+	if (!tmp)
+		return -ENOMEM;
+	tmp[0] = '\0';
+	path_ret[0] = '\0';
+	btrfs_init_path(&path);
+	while (cur != BTRFS_FS_TREE_OBJECTID) {
+		struct btrfs_root_ref *rr;
+		struct btrfs_key location;
+		int name_len;
+		u64 ino;
+
+		key.objectid = cur;
+		key.type = BTRFS_ROOT_BACKREF_KEY;
+		key.offset = (u64)-1;
+		btrfs_release_path(&path);
+
+		ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+		if (ret == 0)
+			ret = -EUCLEAN;
+		if (ret < 0)
+			goto out;
+		ret = btrfs_previous_item(tree_root, &path, cur,
+					  BTRFS_ROOT_BACKREF_KEY);
+		if (ret > 0)
+			ret = -ENOENT;
+		if (ret < 0)
+			goto out;
+
+		/* Get the subvolume name */
+		rr = btrfs_item_ptr(path.nodes[0], path.slots[0],
+				    struct btrfs_root_ref);
+		strncpy(tmp, path_ret, PATH_MAX);
+		name_len = btrfs_root_ref_name_len(path.nodes[0], rr);
+		if (name_len > BTRFS_NAME_LEN) {
+			ret = -ENAMETOOLONG;
+			goto out;
+		}
+		ino = btrfs_root_ref_dirid(path.nodes[0], rr);
+		read_extent_buffer(path.nodes[0], path_ret,
+				   (unsigned long)(rr + 1), name_len);
+		path_ret[name_len] = '/';
+		path_ret[name_len + 1] = '\0';
+		strncat(path_ret, tmp, PATH_MAX);
+
+		/* Get the path inside the parent subvolume */
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		location.objectid = key.offset;
+		location.type = BTRFS_ROOT_ITEM_KEY;
+		location.offset = (u64)-1;
+		root = btrfs_read_fs_root(fs_info, &location);
+		if (IS_ERR(root)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
+		ret = get_path_in_subvol(root, ino, path_ret);
+		if (ret < 0)
+			goto out;
+		cur = key.offset;
+	}
+	/* Add the leading '/' */
+	strncpy(tmp, path_ret, PATH_MAX);
+	strncpy(path_ret, "/", PATH_MAX);
+	strncat(path_ret, tmp, PATH_MAX);
+out:
+	btrfs_release_path(&path);
+	free(tmp);
+	return ret;
 }
 
-static void list_subvols(u64 tree, char *nameptr, int max_name_len, int level)
+static int list_subvolums(struct btrfs_fs_info *fs_info)
 {
-	struct btrfs_key key, *found_key;
+	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *root;
 	struct btrfs_path path;
-	struct btrfs_root_ref *ref;
-	int res;
-
-	key.objectid = tree;
-	key.type = BTRFS_ROOT_REF_KEY;
+	struct btrfs_key key;
+	char *result;
+	int ret = 0;
+
+	result = malloc(PATH_MAX);
+	if (!result)
+		return -ENOMEM;
+
+	ret = list_one_subvol(fs_info->fs_root, result);
+	if (ret < 0)
+		goto out;
+	root = fs_info->fs_root;
+	printf("ID %llu gen %llu path %.*s\n",
+		root->root_key.objectid, btrfs_root_generation(&root->root_item),
+		PATH_MAX, result);
+
+	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
 	key.offset = 0;
-
-	if (btrfs_search_tree(&btrfs_info.tree_root, &key, &path))
-		return;
-
-	do {
-		found_key = btrfs_path_leaf_key(&path);
-		if (btrfs_comp_keys_type(&key, found_key))
+	btrfs_init_path(&path);
+	ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+	if (ret < 0)
+		goto out;
+	while (1) {
+		if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
+			goto next;
+
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
 			break;
-
-		ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
-		btrfs_root_ref_to_cpu(ref);
-
-		printf("ID %llu parent %llu name ", found_key->offset, tree);
-		if (nameptr && !get_subvol_name(found_key->offset, nameptr,
-						max_name_len))
-			printf("%s\n", nameptr);
-		else
-			printf("%.*s\n", (int) ref->name_len,
-			       (const char *) (ref + 1));
-
-		if (level > 0)
-			list_subvols(found_key->offset, nameptr, max_name_len,
-				     level - 1);
-		else
-			printf("%s: Too much recursion, maybe skipping some "
-			       "subvolumes\n", __func__);
-	} while (!(res = btrfs_next_slot(&path)));
-
-	btrfs_free_path(&path);
+		if (key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
+		    key.type != BTRFS_ROOT_ITEM_KEY)
+			goto next;
+		key.offset = (u64)-1;
+		root = btrfs_read_fs_root(fs_info, &key);
+		if (IS_ERR(root)) {
+			ret = PTR_ERR(root);
+			if (ret == -ENOENT)
+				goto next;
+		}
+		ret = list_one_subvol(root, result);
+		if (ret < 0)
+			goto out;
+		printf("ID %llu gen %llu path %.*s\n",
+			root->root_key.objectid,
+			btrfs_root_generation(&root->root_item),
+			PATH_MAX, result);
+next:
+		ret = btrfs_next_item(tree_root, &path);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			ret = 0;
+			break;
+		}
+	}
+out:
+	free(result);
+	return ret;
 }
 
 void btrfs_list_subvols(void)
 {
-	char *nameptr = malloc(4096);
+	struct btrfs_fs_info *fs_info = current_fs_info;
+	int ret;
 
-	list_subvols(BTRFS_FS_TREE_OBJECTID, nameptr, nameptr ? 4096 : 0, 40);
-
-	if (nameptr)
-		free(nameptr);
+	if (!fs_info)
+		return;
+	ret = list_subvolums(fs_info);
+	if (ret < 0)
+		error("failed to list subvolume: %d", ret);
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
deleted file mode 100644
index 913a4d402e9..00000000000
--- a/fs/btrfs/super.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include <common.h>
-#include <log.h>
-#include <memalign.h>
-#include <part.h>
-#include <linux/compat.h>
-#include "btrfs.h"
-
-#define BTRFS_SUPER_FLAG_SUPP	(BTRFS_HEADER_FLAG_WRITTEN	\
-				 | BTRFS_HEADER_FLAG_RELOC	\
-				 | BTRFS_SUPER_FLAG_ERROR	\
-				 | BTRFS_SUPER_FLAG_SEEDING	\
-				 | BTRFS_SUPER_FLAG_METADUMP)
-
-#define BTRFS_SUPER_INFO_SIZE	4096
-
-/*
- * checks if a valid root backup is present.
- * considers the case when all root backups empty valid.
- * returns -1 in case of invalid root backup and 0 for valid.
- */
-static int btrfs_check_super_roots(struct btrfs_super_block *sb)
-{
-	struct btrfs_root_backup *root_backup;
-	int i, newest = -1;
-	int num_empty = 0;
-
-	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; ++i) {
-		root_backup = sb->super_roots + i;
-
-		if (root_backup->tree_root == 0 && root_backup->tree_root_gen == 0)
-			num_empty++;
-
-		if (root_backup->tree_root_gen == sb->generation)
-			newest = i;
-	}
-
-	if (num_empty == BTRFS_NUM_BACKUP_ROOTS) {
-		return 0;
-	} else if (newest >= 0) {
-		return 0;
-	}
-
-	return -1;
-}
-
-static inline int is_power_of_2(u64 x)
-{
-	return !(x & (x - 1));
-}
-
-static int btrfs_check_super_csum(char *raw_disk_sb)
-{
-	struct btrfs_super_block *disk_sb =
-		(struct btrfs_super_block *) raw_disk_sb;
-	u16 csum_type = le16_to_cpu(disk_sb->csum_type);
-
-	if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
-		u32 crc = ~(u32) 0;
-		const int csum_size = sizeof(crc);
-		char result[csum_size];
-
-		crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc,
-				      BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-		btrfs_csum_final(crc, result);
-
-		if (memcmp(raw_disk_sb, result, csum_size))
-			return -1;
-	} else {
-		return -1;
-	}
-
-	return 0;
-}
-
-static int btrfs_check_super(struct btrfs_super_block *sb)
-{
-	int ret = 0;
-
-	if (sb->flags & ~BTRFS_SUPER_FLAG_SUPP) {
-		printf("%s: Unsupported flags: %llu\n", __func__,
-		       sb->flags & ~BTRFS_SUPER_FLAG_SUPP);
-	}
-
-	if (sb->root_level > BTRFS_MAX_LEVEL) {
-		printf("%s: tree_root level too big: %d >= %d\n", __func__,
-		       sb->root_level, BTRFS_MAX_LEVEL);
-		ret = -1;
-	}
-
-	if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
-		printf("%s: chunk_root level too big: %d >= %d\n", __func__,
-		       sb->chunk_root_level, BTRFS_MAX_LEVEL);
-		ret = -1;
-	}
-
-	if (sb->log_root_level > BTRFS_MAX_LEVEL) {
-		printf("%s: log_root level too big: %d >= %d\n", __func__,
-		       sb->log_root_level, BTRFS_MAX_LEVEL);
-		ret = -1;
-	}
-
-	if (!is_power_of_2(sb->sectorsize) || sb->sectorsize < 4096 ||
-	    sb->sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-		printf("%s: invalid sectorsize %u\n", __func__,
-		       sb->sectorsize);
-		ret = -1;
-	}
-
-	if (!is_power_of_2(sb->nodesize) || sb->nodesize < sb->sectorsize ||
-	    sb->nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-		printf("%s: invalid nodesize %u\n", __func__, sb->nodesize);
-		ret = -1;
-	}
-
-	if (sb->nodesize != sb->__unused_leafsize) {
-		printf("%s: invalid leafsize %u, should be %u\n", __func__,
-		       sb->__unused_leafsize, sb->nodesize);
-		ret = -1;
-	}
-
-	if (!IS_ALIGNED(sb->root, sb->sectorsize)) {
-		printf("%s: tree_root block unaligned: %llu\n", __func__,
-		       sb->root);
-		ret = -1;
-	}
-
-	if (!IS_ALIGNED(sb->chunk_root, sb->sectorsize)) {
-		printf("%s: chunk_root block unaligned: %llu\n", __func__,
-		       sb->chunk_root);
-		ret = -1;
-	}
-
-	if (!IS_ALIGNED(sb->log_root, sb->sectorsize)) {
-		printf("%s: log_root block unaligned: %llu\n", __func__,
-		       sb->log_root);
-		ret = -1;
-	}
-
-	if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
-		printf("%s: dev_item UUID does not match fsid\n", __func__);
-		ret = -1;
-	}
-
-	if (sb->bytes_used < 6*sb->nodesize) {
-		printf("%s: bytes_used is too small %llu\n", __func__,
-		       sb->bytes_used);
-		ret = -1;
-	}
-
-	if (!is_power_of_2(sb->stripesize)) {
-		printf("%s: invalid stripesize %u\n", __func__, sb->stripesize);
-		ret = -1;
-	}
-
-	if (sb->sys_chunk_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
-		printf("%s: system chunk array too big %u > %u\n", __func__,
-		       sb->sys_chunk_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
-		ret = -1;
-	}
-
-	if (sb->sys_chunk_array_size < sizeof(struct btrfs_key) +
-	    sizeof(struct btrfs_chunk)) {
-		printf("%s: system chunk array too small %u < %zu\n", __func__,
-		       sb->sys_chunk_array_size, sizeof(struct btrfs_key)
-		       + sizeof(struct btrfs_chunk));
-		ret = -1;
-	}
-
-	return ret;
-}
-
-int btrfs_read_superblock(void)
-{
-	const u64 superblock_offsets[4] = {
-		0x10000ull,
-		0x4000000ull,
-		0x4000000000ull,
-		0x4000000000000ull
-	};
-	ALLOC_CACHE_ALIGN_BUFFER(char, raw_sb, BTRFS_SUPER_INFO_SIZE);
-	struct btrfs_super_block *sb = (struct btrfs_super_block *) raw_sb;
-	u64 dev_total_bytes;
-	int i;
-
-	dev_total_bytes = (u64) btrfs_part_info->size * btrfs_part_info->blksz;
-
-	btrfs_info.sb.generation = 0;
-
-	for (i = 0; i < 4; ++i) {
-		if (superblock_offsets[i] + sizeof(sb) > dev_total_bytes)
-			break;
-
-		if (!btrfs_devread(superblock_offsets[i], BTRFS_SUPER_INFO_SIZE,
-				   raw_sb))
-			break;
-
-		if (btrfs_check_super_csum(raw_sb)) {
-			debug("%s: invalid checksum at superblock mirror %i\n",
-			      __func__, i);
-			continue;
-		}
-
-		btrfs_super_block_to_cpu(sb);
-
-		if (sb->magic != BTRFS_MAGIC) {
-			debug("%s: invalid BTRFS magic 0x%016llX at "
-			      "superblock mirror %i\n", __func__, sb->magic, i);
-		} else if (sb->bytenr != superblock_offsets[i]) {
-			printf("%s: invalid bytenr 0x%016llX (expected "
-			       "0x%016llX) at superblock mirror %i\n",
-			       __func__, sb->bytenr, superblock_offsets[i], i);
-		} else if (btrfs_check_super(sb)) {
-			printf("%s: Checking superblock mirror %i failed\n",
-			       __func__, i);
-		} else if (sb->generation > btrfs_info.sb.generation) {
-			memcpy(&btrfs_info.sb, sb, sizeof(*sb));
-		} else {
-			/* Nothing */
-		}
-	}
-
-	if (!btrfs_info.sb.generation) {
-		debug("%s: No valid BTRFS superblock found!\n", __func__);
-		return -1;
-	}
-
-	if (btrfs_check_super_roots(&btrfs_info.sb)) {
-		printf("%s: No valid root_backup found!\n", __func__);
-		return -1;
-	}
-
-	if (sb->sectorsize != PAGE_SIZE) {
-		printf(
-	"%s: Unsupported sector size (%u), only supports %u as sector size\n",
-			__func__, sb->sectorsize, PAGE_SIZE);
-		return -1;
-	}
-
-	if (btrfs_info.sb.num_devices != 1) {
-		printf("%s: Unsupported number of devices (%lli). This driver "
-		       "only supports filesystem on one device.\n", __func__,
-		       btrfs_info.sb.num_devices);
-		return -1;
-	}
-
-	debug("Chosen superblock with generation = %llu\n",
-	      btrfs_info.sb.generation);
-
-	return 0;
-}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644
index 00000000000..fcf52d4b0ff
--- /dev/null
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,1173 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <stdlib.h>
+#include <common.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "extent-io.h"
+
+const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+	[BTRFS_RAID_RAID10] = {
+		.sub_stripes	= 2,
+		.dev_stripes	= 1,
+		.devs_max	= 0,	/* 0 == as many as possible */
+		.devs_min	= 4,
+		.tolerated_failures = 1,
+		.devs_increment	= 2,
+		.ncopies	= 2,
+		.nparity	= 0,
+		.raid_name	= "raid10",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID10,
+	},
+	[BTRFS_RAID_RAID1] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 2,
+		.devs_min	= 2,
+		.tolerated_failures = 1,
+		.devs_increment	= 2,
+		.ncopies	= 2,
+		.nparity	= 0,
+		.raid_name	= "raid1",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID1,
+	},
+	[BTRFS_RAID_RAID1C3] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 3,
+		.devs_min	= 3,
+		.tolerated_failures = 2,
+		.devs_increment	= 3,
+		.ncopies	= 3,
+		.raid_name	= "raid1c3",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID1C3,
+	},
+	[BTRFS_RAID_RAID1C4] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 4,
+		.devs_min	= 4,
+		.tolerated_failures = 3,
+		.devs_increment	= 4,
+		.ncopies	= 4,
+		.raid_name	= "raid1c4",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID1C4,
+	},
+	[BTRFS_RAID_DUP] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 2,
+		.devs_max	= 1,
+		.devs_min	= 1,
+		.tolerated_failures = 0,
+		.devs_increment	= 1,
+		.ncopies	= 2,
+		.nparity	= 0,
+		.raid_name	= "dup",
+		.bg_flag	= BTRFS_BLOCK_GROUP_DUP,
+	},
+	[BTRFS_RAID_RAID0] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 0,
+		.devs_min	= 2,
+		.tolerated_failures = 0,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity	= 0,
+		.raid_name	= "raid0",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID0,
+	},
+	[BTRFS_RAID_SINGLE] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 1,
+		.devs_min	= 1,
+		.tolerated_failures = 0,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity	= 0,
+		.raid_name	= "single",
+		.bg_flag	= 0,
+	},
+	[BTRFS_RAID_RAID5] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 0,
+		.devs_min	= 2,
+		.tolerated_failures = 1,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity	= 1,
+		.raid_name	= "raid5",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID5,
+	},
+	[BTRFS_RAID_RAID6] = {
+		.sub_stripes	= 1,
+		.dev_stripes	= 1,
+		.devs_max	= 0,
+		.devs_min	= 3,
+		.tolerated_failures = 2,
+		.devs_increment	= 1,
+		.ncopies	= 1,
+		.nparity	= 2,
+		.raid_name	= "raid6",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID6,
+	},
+};
+
+struct stripe {
+	struct btrfs_device *dev;
+	u64 physical;
+};
+
+static inline int nr_parity_stripes(struct map_lookup *map)
+{
+	if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+		return 1;
+	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+		return 2;
+	else
+		return 0;
+}
+
+static inline int nr_data_stripes(struct map_lookup *map)
+{
+	return map->num_stripes - nr_parity_stripes(map);
+}
+
+#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
+
+static LIST_HEAD(fs_uuids);
+
+/*
+ * Find a device specified by @devid or @uuid in the list of @fs_devices, or
+ * return NULL.
+ *
+ * If devid and uuid are both specified, the match must be exact, otherwise
+ * only devid is used.
+ */
+static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
+		u64 devid, u8 *uuid)
+{
+	struct list_head *head = &fs_devices->devices;
+	struct btrfs_device *dev;
+
+	list_for_each_entry(dev, head, dev_list) {
+		if (dev->devid == devid &&
+		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
+			return dev;
+		}
+	}
+	return NULL;
+}
+
+static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid)
+{
+	struct btrfs_fs_devices *fs_devices;
+
+	list_for_each_entry(fs_devices, &fs_uuids, list) {
+		if (metadata_uuid && (memcmp(fsid, fs_devices->fsid,
+					     BTRFS_FSID_SIZE) == 0) &&
+		    (memcmp(metadata_uuid, fs_devices->metadata_uuid,
+			    BTRFS_FSID_SIZE) == 0)) {
+			return fs_devices;
+		} else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){
+			return fs_devices;
+		}
+	}
+	return NULL;
+}
+
+static int device_list_add(struct btrfs_super_block *disk_super,
+			   u64 devid, struct blk_desc *desc,
+			   struct disk_partition *part,
+			   struct btrfs_fs_devices **fs_devices_ret)
+{
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *fs_devices;
+	u64 found_transid = btrfs_super_generation(disk_super);
+	bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
+		BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+	if (metadata_uuid)
+		fs_devices = find_fsid(disk_super->fsid,
+				       disk_super->metadata_uuid);
+	else
+		fs_devices = find_fsid(disk_super->fsid, NULL);
+
+	if (!fs_devices) {
+		fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+		if (!fs_devices)
+			return -ENOMEM;
+		INIT_LIST_HEAD(&fs_devices->devices);
+		list_add(&fs_devices->list, &fs_uuids);
+		memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+		if (metadata_uuid)
+			memcpy(fs_devices->metadata_uuid,
+			       disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+		else
+			memcpy(fs_devices->metadata_uuid, fs_devices->fsid,
+			       BTRFS_FSID_SIZE);
+
+		fs_devices->latest_devid = devid;
+		fs_devices->latest_trans = found_transid;
+		fs_devices->lowest_devid = (u64)-1;
+		device = NULL;
+	} else {
+		device = find_device(fs_devices, devid,
+				    disk_super->dev_item.uuid);
+	}
+	if (!device) {
+		device = kzalloc(sizeof(*device), GFP_NOFS);
+		if (!device) {
+			/* we can safely leave the fs_devices entry around */
+			return -ENOMEM;
+		}
+		device->devid = devid;
+		device->desc = desc;
+		device->part = part;
+		device->generation = found_transid;
+		memcpy(device->uuid, disk_super->dev_item.uuid,
+		       BTRFS_UUID_SIZE);
+		device->total_devs = btrfs_super_num_devices(disk_super);
+		device->super_bytes_used = btrfs_super_bytes_used(disk_super);
+		device->total_bytes =
+			btrfs_stack_device_total_bytes(&disk_super->dev_item);
+		device->bytes_used =
+			btrfs_stack_device_bytes_used(&disk_super->dev_item);
+		list_add(&device->dev_list, &fs_devices->devices);
+		device->fs_devices = fs_devices;
+	} else if (!device->desc || !device->part) {
+		/*
+		 * The existing device has newer generation, so this one could
+		 * be a stale one, don't add it.
+		 */
+		if (found_transid < device->generation) {
+			error(
+	"adding devid %llu gen %llu but found an existing device gen %llu",
+				device->devid, found_transid,
+				device->generation);
+			return -EEXIST;
+		} else {
+			device->desc = desc;
+			device->part = part;
+		}
+	}
+
+
+	if (found_transid > fs_devices->latest_trans) {
+		fs_devices->latest_devid = devid;
+		fs_devices->latest_trans = found_transid;
+	}
+	if (fs_devices->lowest_devid > devid) {
+		fs_devices->lowest_devid = devid;
+	}
+	*fs_devices_ret = fs_devices;
+	return 0;
+}
+
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+{
+	struct btrfs_fs_devices *seed_devices;
+	struct btrfs_device *device;
+	int ret = 0;
+
+again:
+	if (!fs_devices)
+		return 0;
+	while (!list_empty(&fs_devices->devices)) {
+		device = list_entry(fs_devices->devices.next,
+				    struct btrfs_device, dev_list);
+		list_del(&device->dev_list);
+		/* free the memory */
+		free(device);
+	}
+
+	seed_devices = fs_devices->seed;
+	fs_devices->seed = NULL;
+	if (seed_devices) {
+		struct btrfs_fs_devices *orig;
+
+		orig = fs_devices;
+		fs_devices = seed_devices;
+		list_del(&orig->list);
+		free(orig);
+		goto again;
+	} else {
+		list_del(&fs_devices->list);
+		free(fs_devices);
+	}
+
+	return ret;
+}
+
+void btrfs_close_all_devices(void)
+{
+	struct btrfs_fs_devices *fs_devices;
+
+	while (!list_empty(&fs_uuids)) {
+		fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
+					list);
+		btrfs_close_devices(fs_devices);
+	}
+}
+
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices)
+{
+	struct btrfs_device *device;
+
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (!device->desc || !device->part) {
+			printf("no device found for devid %llu, skip it \n",
+				device->devid);
+			continue;
+		}
+	}
+	return 0;
+}
+
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+			  struct btrfs_fs_devices **fs_devices_ret,
+			  u64 *total_devs)
+{
+	struct btrfs_super_block *disk_super;
+	char buf[BTRFS_SUPER_INFO_SIZE];
+	int ret;
+	u64 devid;
+
+	disk_super = (struct btrfs_super_block *)buf;
+	ret = btrfs_read_dev_super(desc, part, disk_super);
+	if (ret < 0)
+		return -EIO;
+	devid = btrfs_stack_device_id(&disk_super->dev_item);
+	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
+		*total_devs = 1;
+	else
+		*total_devs = btrfs_super_num_devices(disk_super);
+
+	ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret);
+
+	return ret;
+}
+
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+				       u8 *uuid, u8 *fsid)
+{
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *cur_devices;
+
+	cur_devices = fs_info->fs_devices;
+	while (cur_devices) {
+		if (!fsid ||
+		   !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
+			device = find_device(cur_devices, devid, uuid);
+			if (device)
+				return device;
+		}
+		cur_devices = cur_devices->seed;
+	}
+	return NULL;
+}
+
+static struct btrfs_device *fill_missing_device(u64 devid)
+{
+	struct btrfs_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_NOFS);
+	return device;
+}
+
+/*
+ * slot == -1: SYSTEM chunk
+ * return -EIO on error, otherwise return 0
+ */
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+			    struct extent_buffer *leaf,
+			    struct btrfs_chunk *chunk,
+			    int slot, u64 logical)
+{
+	u64 length;
+	u64 stripe_len;
+	u16 num_stripes;
+	u16 sub_stripes;
+	u64 type;
+	u32 chunk_ondisk_size;
+	u32 sectorsize = fs_info->sectorsize;
+
+	/*
+	 * Basic chunk item size check.  Note that btrfs_chunk already contains
+	 * one stripe, so no "==" check.
+	 */
+	if (slot >= 0 &&
+	    btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
+		error("invalid chunk item size, have %u expect [%zu, %zu)",
+			btrfs_item_size_nr(leaf, slot),
+			sizeof(struct btrfs_chunk),
+			BTRFS_LEAF_DATA_SIZE(fs_info));
+		return -EUCLEAN;
+	}
+	length = btrfs_chunk_length(leaf, chunk);
+	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+	type = btrfs_chunk_type(leaf, chunk);
+
+	if (num_stripes == 0) {
+		error("invalid num_stripes, have %u expect non-zero",
+			num_stripes);
+		return -EUCLEAN;
+	}
+	if (slot >= 0 && btrfs_chunk_item_size(num_stripes) !=
+	    btrfs_item_size_nr(leaf, slot)) {
+		error("invalid chunk item size, have %u expect %lu",
+			btrfs_item_size_nr(leaf, slot),
+			btrfs_chunk_item_size(num_stripes));
+		return -EUCLEAN;
+	}
+
+	/*
+	 * These valid checks may be insufficient to cover every corner cases.
+	 */
+	if (!IS_ALIGNED(logical, sectorsize)) {
+		error("invalid chunk logical %llu",  logical);
+		return -EIO;
+	}
+	if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) {
+		error("invalid chunk sectorsize %llu",
+		      (unsigned long long)btrfs_chunk_sector_size(leaf, chunk));
+		return -EIO;
+	}
+	if (!length || !IS_ALIGNED(length, sectorsize)) {
+		error("invalid chunk length %llu",  length);
+		return -EIO;
+	}
+	if (stripe_len != BTRFS_STRIPE_LEN) {
+		error("invalid chunk stripe length: %llu", stripe_len);
+		return -EIO;
+	}
+	/* Check on chunk item type */
+	if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+		error("invalid chunk type %llu", type);
+		return -EIO;
+	}
+	if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+		     BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+		error("unrecognized chunk type: %llu",
+		      ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+			BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
+		return -EIO;
+	}
+	if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+		error("missing chunk type flag: %llu", type);
+		return -EIO;
+	}
+	if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) ||
+	      (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) {
+		error("conflicting chunk type detected: %llu", type);
+		return -EIO;
+	}
+	if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+	    !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+		error("conflicting chunk profile detected: %llu", type);
+		return -EIO;
+	}
+
+	chunk_ondisk_size = btrfs_chunk_item_size(num_stripes);
+	/*
+	 * Btrfs_chunk contains at least one stripe, and for sys_chunk
+	 * it can't exceed the system chunk array size
+	 * For normal chunk, it should match its chunk item size.
+	 */
+	if (num_stripes < 1 ||
+	    (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
+	    (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) {
+		error("invalid num_stripes: %u", num_stripes);
+		return -EIO;
+	}
+	/*
+	 * Device number check against profile
+	 */
+	if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 ||
+		  !IS_ALIGNED(num_stripes, sub_stripes))) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+	     num_stripes != 1)) {
+		error("Invalid num_stripes:sub_stripes %u:%u for profile %llu",
+		      num_stripes, sub_stripes,
+		      type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * Slot is used to verify the chunk item is valid
+ *
+ * For sys chunk in superblock, pass -1 to indicate sys chunk.
+ */
+static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+			  struct extent_buffer *leaf,
+			  struct btrfs_chunk *chunk, int slot)
+{
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+	struct map_lookup *map;
+	struct cache_extent *ce;
+	u64 logical;
+	u64 length;
+	u64 devid;
+	u8 uuid[BTRFS_UUID_SIZE];
+	int num_stripes;
+	int ret;
+	int i;
+
+	logical = key->offset;
+	length = btrfs_chunk_length(leaf, chunk);
+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+	/* Validation check */
+	ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical);
+	if (ret) {
+		error("%s checksums match, but it has an invalid chunk, %s",
+		      (slot == -1) ? "Superblock" : "Metadata",
+		      (slot == -1) ? "try btrfsck --repair -s <superblock> ie, 0,1,2" : "");
+		return ret;
+	}
+
+	ce = search_cache_extent(&map_tree->cache_tree, logical);
+
+	/* already mapped? */
+	if (ce && ce->start <= logical && ce->start + ce->size > logical) {
+		return 0;
+	}
+
+	map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+	if (!map)
+		return -ENOMEM;
+
+	map->ce.start = logical;
+	map->ce.size = length;
+	map->num_stripes = num_stripes;
+	map->io_width = btrfs_chunk_io_width(leaf, chunk);
+	map->io_align = btrfs_chunk_io_align(leaf, chunk);
+	map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+	map->type = btrfs_chunk_type(leaf, chunk);
+	map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+
+	for (i = 0; i < num_stripes; i++) {
+		map->stripes[i].physical =
+			btrfs_stripe_offset_nr(leaf, chunk, i);
+		devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+		read_extent_buffer(leaf, uuid, (unsigned long)
+				   btrfs_stripe_dev_uuid_nr(chunk, i),
+				   BTRFS_UUID_SIZE);
+		map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid,
+							NULL);
+		if (!map->stripes[i].dev) {
+			map->stripes[i].dev = fill_missing_device(devid);
+			printf("warning, device %llu is missing\n",
+			       (unsigned long long)devid);
+			list_add(&map->stripes[i].dev->dev_list,
+				 &fs_info->fs_devices->devices);
+		}
+
+	}
+	ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
+	if (ret < 0) {
+		errno = -ret;
+		error("failed to add chunk map start=%llu len=%llu: %d (%m)",
+		      map->ce.start, map->ce.size, ret);
+	}
+
+	return ret;
+}
+
+static int fill_device_from_item(struct extent_buffer *leaf,
+				 struct btrfs_dev_item *dev_item,
+				 struct btrfs_device *device)
+{
+	unsigned long ptr;
+
+	device->devid = btrfs_device_id(leaf, dev_item);
+	device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+	device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+	device->type = btrfs_device_type(leaf, dev_item);
+	device->io_align = btrfs_device_io_align(leaf, dev_item);
+	device->io_width = btrfs_device_io_width(leaf, dev_item);
+	device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+
+	ptr = (unsigned long)btrfs_device_uuid(dev_item);
+	read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+
+	return 0;
+}
+
+static int read_one_dev(struct btrfs_fs_info *fs_info,
+			struct extent_buffer *leaf,
+			struct btrfs_dev_item *dev_item)
+{
+	struct btrfs_device *device;
+	u64 devid;
+	int ret = 0;
+	u8 fs_uuid[BTRFS_UUID_SIZE];
+	u8 dev_uuid[BTRFS_UUID_SIZE];
+
+	devid = btrfs_device_id(leaf, dev_item);
+	read_extent_buffer(leaf, dev_uuid,
+			   (unsigned long)btrfs_device_uuid(dev_item),
+			   BTRFS_UUID_SIZE);
+	read_extent_buffer(leaf, fs_uuid,
+			   (unsigned long)btrfs_device_fsid(dev_item),
+			   BTRFS_FSID_SIZE);
+
+	if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) {
+		error("Seed device is not yet supported\n");
+		return -ENOTSUPP;
+	}
+
+	device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
+	if (!device) {
+		device = kzalloc(sizeof(*device), GFP_NOFS);
+		if (!device)
+			return -ENOMEM;
+		list_add(&device->dev_list,
+			 &fs_info->fs_devices->devices);
+	}
+
+	fill_device_from_item(leaf, dev_item, device);
+	fs_info->fs_devices->total_rw_bytes +=
+		btrfs_device_total_bytes(leaf, dev_item);
+	return ret;
+}
+
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_super_block *super_copy = fs_info->super_copy;
+	struct extent_buffer *sb;
+	struct btrfs_disk_key *disk_key;
+	struct btrfs_chunk *chunk;
+	u8 *array_ptr;
+	unsigned long sb_array_offset;
+	int ret = 0;
+	u32 num_stripes;
+	u32 array_size;
+	u32 len = 0;
+	u32 cur_offset;
+	struct btrfs_key key;
+
+	if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) {
+		printf("ERROR: nodesize %u too small to read superblock\n",
+				fs_info->nodesize);
+		return -EINVAL;
+	}
+	sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET,
+				       BTRFS_SUPER_INFO_SIZE);
+	if (!sb)
+		return -ENOMEM;
+	btrfs_set_buffer_uptodate(sb);
+	write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy));
+	array_size = btrfs_super_sys_array_size(super_copy);
+
+	array_ptr = super_copy->sys_chunk_array;
+	sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+	cur_offset = 0;
+
+	while (cur_offset < array_size) {
+		disk_key = (struct btrfs_disk_key *)array_ptr;
+		len = sizeof(*disk_key);
+		if (cur_offset + len > array_size)
+			goto out_short_read;
+
+		btrfs_disk_key_to_cpu(&key, disk_key);
+
+		array_ptr += len;
+		sb_array_offset += len;
+		cur_offset += len;
+
+		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+			chunk = (struct btrfs_chunk *)sb_array_offset;
+			/*
+			 * At least one btrfs_chunk with one stripe must be
+			 * present, exact stripe count check comes afterwards
+			 */
+			len = btrfs_chunk_item_size(1);
+			if (cur_offset + len > array_size)
+				goto out_short_read;
+
+			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+			if (!num_stripes) {
+				printk(
+	    "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
+					num_stripes, cur_offset);
+				ret = -EIO;
+				break;
+			}
+
+			len = btrfs_chunk_item_size(num_stripes);
+			if (cur_offset + len > array_size)
+				goto out_short_read;
+
+			ret = read_one_chunk(fs_info, &key, sb, chunk, -1);
+			if (ret)
+				break;
+		} else {
+			printk(
+		"ERROR: unexpected item type %u in sys_array at offset %u\n",
+				(u32)key.type, cur_offset);
+			ret = -EIO;
+			break;
+		}
+		array_ptr += len;
+		sb_array_offset += len;
+		cur_offset += len;
+	}
+	free_extent_buffer(sb);
+	return ret;
+
+out_short_read:
+	printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
+			len, cur_offset);
+	free_extent_buffer(sb);
+	return -EIO;
+}
+
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct btrfs_root *root = fs_info->chunk_root;
+	int ret;
+	int slot;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/*
+	 * Read all device items, and then all the chunk items. All
+	 * device items are found before any chunk item (their object id
+	 * is smaller than the lowest possible object id for a chunk
+	 * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
+	 */
+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+	key.offset = 0;
+	key.type = 0;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto error;
+	while(1) {
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret == 0)
+				continue;
+			if (ret < 0)
+				goto error;
+			break;
+		}
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+		if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+			struct btrfs_dev_item *dev_item;
+			dev_item = btrfs_item_ptr(leaf, slot,
+						  struct btrfs_dev_item);
+			ret = read_one_dev(fs_info, leaf, dev_item);
+			if (ret < 0)
+				goto error;
+		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+			struct btrfs_chunk *chunk;
+			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+			ret = read_one_chunk(fs_info, &found_key, leaf, chunk,
+					     slot);
+			if (ret < 0)
+				goto error;
+		}
+		path->slots[0]++;
+	}
+
+	ret = 0;
+error:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * Get stripe length from chunk item and its stripe items
+ *
+ * Caller should only call this function after validating the chunk item
+ * by using btrfs_check_chunk_valid().
+ */
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+			struct extent_buffer *leaf,
+			struct btrfs_chunk *chunk)
+{
+	u64 stripe_len;
+	u64 chunk_len;
+	u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+	u64 profile = btrfs_chunk_type(leaf, chunk) &
+		      BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+	chunk_len = btrfs_chunk_length(leaf, chunk);
+
+	switch (profile) {
+	case 0: /* Single profile */
+	case BTRFS_BLOCK_GROUP_RAID1:
+	case BTRFS_BLOCK_GROUP_RAID1C3:
+	case BTRFS_BLOCK_GROUP_RAID1C4:
+	case BTRFS_BLOCK_GROUP_DUP:
+		stripe_len = chunk_len;
+		break;
+	case BTRFS_BLOCK_GROUP_RAID0:
+		stripe_len = chunk_len / num_stripes;
+		break;
+	case BTRFS_BLOCK_GROUP_RAID5:
+		stripe_len = chunk_len / (num_stripes - 1);
+		break;
+	case BTRFS_BLOCK_GROUP_RAID6:
+		stripe_len = chunk_len / (num_stripes - 2);
+		break;
+	case BTRFS_BLOCK_GROUP_RAID10:
+		stripe_len = chunk_len / (num_stripes /
+				btrfs_chunk_sub_stripes(leaf, chunk));
+		break;
+	default:
+		/* Invalid chunk profile found */
+		BUG_ON(1);
+	}
+	return stripe_len;
+}
+
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
+{
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+	struct cache_extent *ce;
+	struct map_lookup *map;
+	int ret;
+
+	ce = search_cache_extent(&map_tree->cache_tree, logical);
+	if (!ce) {
+		fprintf(stderr, "No mapping for %llu-%llu\n",
+			(unsigned long long)logical,
+			(unsigned long long)logical+len);
+		return 1;
+	}
+	if (ce->start > logical || ce->start + ce->size < logical) {
+		fprintf(stderr, "Invalid mapping for %llu-%llu, got "
+			"%llu-%llu\n", (unsigned long long)logical,
+			(unsigned long long)logical+len,
+			(unsigned long long)ce->start,
+			(unsigned long long)ce->start + ce->size);
+		return 1;
+	}
+	map = container_of(ce, struct map_lookup, ce);
+
+	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+			 BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))
+		ret = map->num_stripes;
+	else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+		ret = map->sub_stripes;
+	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+		ret = 2;
+	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+		ret = 3;
+	else
+		ret = 1;
+	return ret;
+}
+
+int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical,
+		  u64 *size, u64 type)
+{
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+	struct cache_extent *ce;
+	struct map_lookup *map;
+	u64 cur = *logical;
+
+	ce = search_cache_extent(&map_tree->cache_tree, cur);
+
+	while (ce) {
+		/*
+		 * only jump to next bg if our cur is not 0
+		 * As the initial logical for btrfs_next_bg() is 0, and
+		 * if we jump to next bg, we skipped a valid bg.
+		 */
+		if (cur) {
+			ce = next_cache_extent(ce);
+			if (!ce)
+				return -ENOENT;
+		}
+
+		cur = ce->start;
+		map = container_of(ce, struct map_lookup, ce);
+		if (map->type & type) {
+			*logical = ce->start;
+			*size = ce->size;
+			return 0;
+		}
+		if (!cur)
+			ce = next_cache_extent(ce);
+	}
+
+	return -ENOENT;
+}
+
+static inline int parity_smaller(u64 a, u64 b)
+{
+	return a > b;
+}
+
+/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
+static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
+{
+	struct btrfs_bio_stripe s;
+	int i;
+	u64 l;
+	int again = 1;
+
+	while (again) {
+		again = 0;
+		for (i = 0; i < bbio->num_stripes - 1; i++) {
+			if (parity_smaller(raid_map[i], raid_map[i+1])) {
+				s = bbio->stripes[i];
+				l = raid_map[i];
+				bbio->stripes[i] = bbio->stripes[i+1];
+				raid_map[i] = raid_map[i+1];
+				bbio->stripes[i+1] = s;
+				raid_map[i+1] = l;
+				again = 1;
+			}
+		}
+	}
+}
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+		      u64 logical, u64 *length, u64 *type,
+		      struct btrfs_multi_bio **multi_ret, int mirror_num,
+		      u64 **raid_map_ret)
+{
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+	struct cache_extent *ce;
+	struct map_lookup *map;
+	u64 offset;
+	u64 stripe_offset;
+	u64 *raid_map = NULL;
+	int stripe_nr;
+	int stripes_allocated = 8;
+	int stripes_required = 1;
+	int stripe_index;
+	int i;
+	struct btrfs_multi_bio *multi = NULL;
+
+	if (multi_ret && rw == READ) {
+		stripes_allocated = 1;
+	}
+again:
+	ce = search_cache_extent(&map_tree->cache_tree, logical);
+	if (!ce) {
+		kfree(multi);
+		*length = (u64)-1;
+		return -ENOENT;
+	}
+	if (ce->start > logical) {
+		kfree(multi);
+		*length = ce->start - logical;
+		return -ENOENT;
+	}
+
+	if (multi_ret) {
+		multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+				GFP_NOFS);
+		if (!multi)
+			return -ENOMEM;
+	}
+	map = container_of(ce, struct map_lookup, ce);
+	offset = logical - ce->start;
+
+	if (rw == WRITE) {
+		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+				 BTRFS_BLOCK_GROUP_RAID1C3 |
+				 BTRFS_BLOCK_GROUP_RAID1C4 |
+				 BTRFS_BLOCK_GROUP_DUP)) {
+			stripes_required = map->num_stripes;
+		} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+			stripes_required = map->sub_stripes;
+		}
+	}
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
+	    && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
+		    /* RAID[56] write or recovery. Return all stripes */
+		    stripes_required = map->num_stripes;
+
+		    /* Only allocate the map if we've already got a large enough multi_ret */
+		    if (stripes_allocated >= stripes_required) {
+			    raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+			    if (!raid_map) {
+				    kfree(multi);
+				    return -ENOMEM;
+			    }
+		    }
+	}
+
+	/* if our multi bio struct is too small, back off and try again */
+	if (multi_ret && stripes_allocated < stripes_required) {
+		stripes_allocated = stripes_required;
+		kfree(multi);
+		multi = NULL;
+		goto again;
+	}
+	stripe_nr = offset;
+	/*
+	 * stripe_nr counts the total number of stripes we have to stride
+	 * to get to this block
+	 */
+	stripe_nr = stripe_nr / map->stripe_len;
+
+	stripe_offset = stripe_nr * map->stripe_len;
+	BUG_ON(offset < stripe_offset);
+
+	/* stripe_offset is the offset of this block in its stripe*/
+	stripe_offset = offset - stripe_offset;
+
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+			 BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 |
+			 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+			 BTRFS_BLOCK_GROUP_RAID10 |
+			 BTRFS_BLOCK_GROUP_DUP)) {
+		/* we limit the length of each bio to what fits in a stripe */
+		*length = min_t(u64, ce->size - offset,
+			      map->stripe_len - stripe_offset);
+	} else {
+		*length = ce->size - offset;
+	}
+
+	if (!multi_ret)
+		goto out;
+
+	multi->num_stripes = 1;
+	stripe_index = 0;
+	if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+			 BTRFS_BLOCK_GROUP_RAID1C3 |
+			 BTRFS_BLOCK_GROUP_RAID1C4)) {
+		if (rw == WRITE)
+			multi->num_stripes = map->num_stripes;
+		else if (mirror_num)
+			stripe_index = mirror_num - 1;
+		else
+			stripe_index = stripe_nr % map->num_stripes;
+	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+		int factor = map->num_stripes / map->sub_stripes;
+
+		stripe_index = stripe_nr % factor;
+		stripe_index *= map->sub_stripes;
+
+		if (rw == WRITE)
+			multi->num_stripes = map->sub_stripes;
+		else if (mirror_num)
+			stripe_index += mirror_num - 1;
+
+		stripe_nr = stripe_nr / factor;
+	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+		if (rw == WRITE)
+			multi->num_stripes = map->num_stripes;
+		else if (mirror_num)
+			stripe_index = mirror_num - 1;
+	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+				BTRFS_BLOCK_GROUP_RAID6)) {
+
+		if (raid_map) {
+			int rot;
+			u64 tmp;
+			u64 raid56_full_stripe_start;
+			u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
+
+			/*
+			 * align the start of our data stripe in the logical
+			 * address space
+			 */
+			raid56_full_stripe_start = offset / full_stripe_len;
+			raid56_full_stripe_start *= full_stripe_len;
+
+			/* get the data stripe number */
+			stripe_nr = raid56_full_stripe_start / map->stripe_len;
+			stripe_nr = stripe_nr / nr_data_stripes(map);
+
+			/* Work out the disk rotation on this stripe-set */
+			rot = stripe_nr % map->num_stripes;
+
+			/* Fill in the logical address of each stripe */
+			tmp = stripe_nr * nr_data_stripes(map);
+
+			for (i = 0; i < nr_data_stripes(map); i++)
+				raid_map[(i+rot) % map->num_stripes] =
+					ce->start + (tmp + i) * map->stripe_len;
+
+			raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
+			if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+				raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
+
+			*length = map->stripe_len;
+			stripe_index = 0;
+			stripe_offset = 0;
+			multi->num_stripes = map->num_stripes;
+		} else {
+			stripe_index = stripe_nr % nr_data_stripes(map);
+			stripe_nr = stripe_nr / nr_data_stripes(map);
+
+			/*
+			 * Mirror #0 or #1 means the original data block.
+			 * Mirror #2 is RAID5 parity block.
+			 * Mirror #3 is RAID6 Q block.
+			 */
+			if (mirror_num > 1)
+				stripe_index = nr_data_stripes(map) + mirror_num - 2;
+
+			/* We distribute the parity blocks across stripes */
+			stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
+		}
+	} else {
+		/*
+		 * after this do_div call, stripe_nr is the number of stripes
+		 * on this device we have to walk to find the data, and
+		 * stripe_index is the number of our device in the stripe array
+		 */
+		stripe_index = stripe_nr % map->num_stripes;
+		stripe_nr = stripe_nr / map->num_stripes;
+	}
+	BUG_ON(stripe_index >= map->num_stripes);
+
+	for (i = 0; i < multi->num_stripes; i++) {
+		multi->stripes[i].physical =
+			map->stripes[stripe_index].physical + stripe_offset +
+			stripe_nr * map->stripe_len;
+		multi->stripes[i].dev = map->stripes[stripe_index].dev;
+		stripe_index++;
+	}
+	*multi_ret = multi;
+
+	if (type)
+		*type = map->type;
+
+	if (raid_map) {
+		sort_parity_stripes(multi, raid_map);
+		*raid_map_ret = raid_map;
+	}
+out:
+	return 0;
+}
+
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+		    u64 logical, u64 *length,
+		    struct btrfs_multi_bio **multi_ret, int mirror_num,
+		    u64 **raid_map_ret)
+{
+	return __btrfs_map_block(fs_info, rw, logical, length, NULL,
+				 multi_ret, mirror_num, raid_map_ret);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
new file mode 100644
index 00000000000..9d1a07ae789
--- /dev/null
+++ b/fs/btrfs/volumes.h
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_VOLUMES_H__
+#define __BTRFS_VOLUMES_H__
+
+#include <fs_internal.h>
+#include "ctree.h"
+
+#define BTRFS_STRIPE_LEN	SZ_64K
+
+struct btrfs_device {
+	struct list_head dev_list;
+	struct btrfs_root *dev_root;
+	struct btrfs_fs_devices *fs_devices;
+
+	struct blk_desc *desc;
+	struct disk_partition *part;
+
+	u64 total_devs;
+	u64 super_bytes_used;
+
+	u64 generation;
+
+	/* the internal btrfs device id */
+	u64 devid;
+
+	/* size of the device */
+	u64 total_bytes;
+
+	/* bytes used */
+	u64 bytes_used;
+
+	/* optimal io alignment for this device */
+	u32 io_align;
+
+	/* optimal io width for this device */
+	u32 io_width;
+
+	/* minimal io size for this device */
+	u32 sector_size;
+
+	/* type and info about this device */
+	u64 type;
+
+	/* physical drive uuid (or lvm uuid) */
+	u8 uuid[BTRFS_UUID_SIZE];
+};
+
+struct btrfs_fs_devices {
+	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+	u8 metadata_uuid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+
+	u64 latest_devid;
+	u64 lowest_devid;
+	u64 latest_trans;
+
+	u64 total_rw_bytes;
+
+	struct list_head devices;
+	struct list_head list;
+
+	int seeding;
+	struct btrfs_fs_devices *seed;
+};
+
+struct btrfs_bio_stripe {
+	struct btrfs_device *dev;
+	u64 physical;
+};
+
+struct btrfs_multi_bio {
+	int error;
+	int num_stripes;
+	struct btrfs_bio_stripe stripes[];
+};
+
+struct map_lookup {
+	struct cache_extent ce;
+	u64 type;
+	int io_align;
+	int io_width;
+	int stripe_len;
+	int sector_size;
+	int num_stripes;
+	int sub_stripes;
+	struct btrfs_bio_stripe stripes[];
+};
+
+struct btrfs_raid_attr {
+	int sub_stripes;	/* sub_stripes info for map */
+	int dev_stripes;	/* stripes per dev */
+	int devs_max;		/* max devs to use */
+	int devs_min;		/* min devs needed */
+	int tolerated_failures; /* max tolerated fail devs */
+	int devs_increment;	/* ndevs has to be a multiple of this */
+	int ncopies;		/* how many copies to data has */
+	int nparity;		/* number of stripes worth of bytes to store
+				 * parity information */
+	const char raid_name[8]; /* name of the raid */
+	u64 bg_flag;		/* block group flag of the raid */
+};
+
+extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
+
+static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
+{
+	if (flags & BTRFS_BLOCK_GROUP_RAID10)
+		return BTRFS_RAID_RAID10;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+		return BTRFS_RAID_RAID1;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+		return BTRFS_RAID_RAID1C3;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+		return BTRFS_RAID_RAID1C4;
+	else if (flags & BTRFS_BLOCK_GROUP_DUP)
+		return BTRFS_RAID_DUP;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+		return BTRFS_RAID_RAID0;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+		return BTRFS_RAID_RAID5;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+		return BTRFS_RAID_RAID6;
+
+	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
+#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
+			    (sizeof(struct btrfs_bio_stripe) * (n)))
+#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \
+				 (sizeof(struct btrfs_bio_stripe) * (n)))
+
+#define BTRFS_RAID5_P_STRIPE ((u64)-2)
+#define BTRFS_RAID6_Q_STRIPE ((u64)-1)
+
+static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
+{
+	u64 stripe_size;
+
+	if (type & BTRFS_BLOCK_GROUP_RAID0) {
+		stripe_size = length;
+		stripe_size /= num_stripes;
+	} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
+		stripe_size = length * 2;
+		stripe_size /= num_stripes;
+	} else if (type & BTRFS_BLOCK_GROUP_RAID5) {
+		stripe_size = length;
+		stripe_size /= (num_stripes - 1);
+	} else if (type & BTRFS_BLOCK_GROUP_RAID6) {
+		stripe_size = length;
+		stripe_size /= (num_stripes - 2);
+	} else {
+		stripe_size = length;
+	}
+	return stripe_size;
+}
+
+#ifndef READ
+#define READ 0
+#define WRITE 1
+#define READA 2
+#endif
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+		      u64 logical, u64 *length, u64 *type,
+		      struct btrfs_multi_bio **multi_ret, int mirror_num,
+		      u64 **raid_map);
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+		    u64 logical, u64 *length,
+		    struct btrfs_multi_bio **multi_ret, int mirror_num,
+		    u64 **raid_map_ret);
+int btrfs_next_bg(struct btrfs_fs_info *map_tree, u64 *logical,
+		     u64 *size, u64 type);
+static inline int btrfs_next_bg_metadata(struct btrfs_fs_info *fs_info,
+					 u64 *logical, u64 *size)
+{
+	return btrfs_next_bg(fs_info, logical, size,
+			BTRFS_BLOCK_GROUP_METADATA);
+}
+static inline int btrfs_next_bg_system(struct btrfs_fs_info *fs_info,
+				       u64 *logical, u64 *size)
+{
+	return btrfs_next_bg(fs_info, logical, size,
+			BTRFS_BLOCK_GROUP_SYSTEM);
+}
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices);
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
+void btrfs_close_all_devices(void);
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+			  struct btrfs_fs_devices **fs_devices_ret,
+			  u64 *total_devs);
+struct list_head *btrfs_scanned_uuids(void);
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+				       u8 *uuid, u8 *fsid);
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+			    struct extent_buffer *leaf,
+			    struct btrfs_chunk *chunk,
+			    int slot, u64 logical);
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+			struct extent_buffer *leaf,
+			struct btrfs_chunk *chunk);
+#endif
author	Tom Rini	2020-09-07 21:00:47 -0400
committer	Tom Rini	2020-09-07 21:00:47 -0400
commit	2a9f9d633d2e069e5d5e7acde050b338ec803692 (patch)
tree	40c97310da83790361c498e158ee3814e8e140a7
parent	314b9b4a38befd120ce1c566d9eea8e0ec9d8336 (diff)
parent	b737c822c0c4f1210568f61e743236bb980ca645 (diff)