diff options
author | Linus Torvalds | 2018-01-29 13:33:53 -0800 |
---|---|---|
committer | Linus Torvalds | 2018-01-29 13:33:53 -0800 |
commit | a4b7fd7d34de5765dece2dd08060d2e1f7be3b39 (patch) | |
tree | 891c72db66e49054dde6dcf6d710afde25775d73 /fs/ext4 | |
parent | d1de762e36375e1e1cd41f7f93c298ac62d831a7 (diff) | |
parent | f02a9ad1f15daf4378afeda025a53455f72645dd (diff) |
Merge tag 'iversion-v4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux
Pull inode->i_version rework from Jeff Layton:
"This pile of patches is a rework of the inode->i_version field. We
have traditionally incremented that field on every inode data or
metadata change. Typically this increment needs to be logged on disk
even when nothing else has changed, which is rather expensive.
It turns out though that none of the consumers of that field actually
require this behavior. The only real requirement for all of them is
that it be different iff the inode has changed since the last time the
field was checked.
Given that, we can optimize away most of the i_version increments and
avoid dirtying inode metadata when the only change is to the i_version
and no one is querying it. Queries of the i_version field are rather
rare, so we can help write performance under many common workloads.
This patch series converts existing accesses of the i_version field to
a new API, and then converts all of the in-kernel filesystems to use
it. The last patch in the series then converts the backend
implementation to a scheme that optimizes away a large portion of the
metadata updates when no one is looking at it.
In my own testing this series significantly helps performance with
small I/O sizes. I also got this email for Christmas this year from
the kernel test robot (a 244% r/w bandwidth improvement with XFS over
DAX, with 4k writes):
https://lkml.org/lkml/2017/12/25/8
A few of the earlier patches in this pile are also flowing to you via
other trees (mm, integrity, and nfsd trees in particular)".
* tag 'iversion-v4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux: (22 commits)
fs: handle inode->i_version more efficiently
btrfs: only dirty the inode in btrfs_update_time if something was changed
xfs: avoid setting XFS_ILOG_CORE if i_version doesn't need incrementing
fs: only set S_VERSION when updating times if necessary
IMA: switch IMA over to new i_version API
xfs: convert to new i_version API
ufs: use new i_version API
ocfs2: convert to new i_version API
nfsd: convert to new i_version API
nfs: convert to new i_version API
ext4: convert to new i_version API
ext2: convert to new i_version API
exofs: switch to new i_version API
btrfs: convert to new i_version API
afs: convert to new i_version API
affs: convert to new i_version API
fat: convert to new i_version API
fs: don't take the i_lock in inode_inc_iversion
fs: new API for handling inode->i_version
ntfs: remove i_version handling
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/dir.c | 9 | ||||
-rw-r--r-- | fs/ext4/inline.c | 7 | ||||
-rw-r--r-- | fs/ext4/inode.c | 13 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 3 | ||||
-rw-r--r-- | fs/ext4/namei.c | 5 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 5 |
7 files changed, 28 insertions, 17 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d5babc9f222b..afda0a0499ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/slab.h> +#include <linux/iversion.h> #include "ext4.h" #include "xattr.h" @@ -208,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -227,7 +228,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -568,10 +569,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - (file->f_version != inode->i_version)) { + inode_cmp_iversion(inode, file->f_version)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1367553c43bb..a8b987b71173 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -14,6 +14,7 @@ #include <linux/iomap.h> #include <linux/fiemap.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -1042,7 +1043,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, */ dir->i_mtime = dir->i_ctime = current_time(dir); ext4_update_dx_flag(dir); - dir->i_version++; + inode_inc_iversion(dir); return 1; } @@ -1494,7 +1495,7 @@ int ext4_read_inline_dir(struct file *file, * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1526,7 +1527,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < extra_size) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 534a9130f625..0eff5b761c6e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/iomap.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -4882,12 +4883,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); + u64 ivers = le32_to_cpu(raw_inode->i_disk_version); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - inode->i_version |= + ivers |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + inode_set_iversion_queried(inode, ivers); } ret = 0; @@ -5173,11 +5176,13 @@ static int ext4_do_update_inode(handle_t *handle, } if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); + u64 ivers = inode_peek_iversion(inode); + + raw_inode->i_disk_version = cpu_to_le32(ivers); if (ei->i_extra_isize) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) raw_inode->i_version_hi = - cpu_to_le32(inode->i_version >> 32); + cpu_to_le32(ivers >> 32); raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1eec25014f62..7e99ad02f1ba 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -19,6 +19,7 @@ #include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/delay.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" #include <linux/fsmap.h> @@ -144,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, i_gid_write(inode_bl, 0); inode_bl->i_flags = 0; ei_bl->i_flags = 0; - inode_bl->i_version = 1; + inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e750d68fbcb5..6660686e505a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -34,6 +34,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> +#include <linux/iversion.h> #include "ext4.h" #include "ext4_jbd2.h" @@ -2959,7 +2960,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) "empty directory '%.*s' has too many links (%u)", dentry->d_name.len, dentry->d_name.name, inode->i_nlink); - inode->i_version++; + inode_inc_iversion(inode); clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is * zero will ensure that the right thing happens during any @@ -3365,7 +3366,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ent->de->inode = cpu_to_le32(ino); if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; - ent->dir->i_version++; + inode_inc_iversion(ent->dir); ent->dir->i_ctime = ent->dir->i_mtime = current_time(ent->dir); ext4_mark_inode_dirty(handle, ent->dir); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c46693a14d7..5de959fb0244 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -40,6 +40,7 @@ #include <linux/dax.h> #include <linux/cleancache.h> #include <linux/uaccess.h> +#include <linux/iversion.h> #include <linux/kthread.h> #include <linux/freezer.h> @@ -967,7 +968,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) if (!ei) return NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 218a7ba57819..63656dbafdc4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -56,6 +56,7 @@ #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -294,13 +295,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) { return ((u64)ea_inode->i_ctime.tv_sec << 32) | - ((u32)ea_inode->i_version); + (u32) inode_peek_iversion_raw(ea_inode); } static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) { ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); - ea_inode->i_version = (u32)ref_count; + inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff); } static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) |