aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c8
-rw-r--r--fs/crypto/policy.c1
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext2/xattr.c48
-rw-r--r--fs/ext4/acl.c21
-rw-r--r--fs/ext4/ext4.h63
-rw-r--r--fs/ext4/ext4_jbd2.h23
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c7
-rw-r--r--fs/ext4/fsmap.c4
-rw-r--r--fs/ext4/ialloc.c76
-rw-r--r--fs/ext4/indirect.c3
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c145
-rw-r--r--fs/ext4/mballoc.h6
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c131
-rw-r--r--fs/ext4/super.c109
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c1699
-rw-r--r--fs/ext4/xattr.h35
-rw-r--r--fs/mbcache.c52
-rw-r--r--fs/quota/dquot.c16
27 files changed, 2071 insertions, 507 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 5234b15377c2..233e2983c5db 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3031,11 +3031,11 @@ EXPORT_SYMBOL(block_write_full_page);
sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
get_block_t *get_block)
{
- struct buffer_head tmp;
struct inode *inode = mapping->host;
- tmp.b_state = 0;
- tmp.b_blocknr = 0;
- tmp.b_size = i_blocksize(inode);
+ struct buffer_head tmp = {
+ .b_size = i_blocksize(inode),
+ };
+
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 9914d51dff86..ce07a86200f3 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -256,6 +256,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+ BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);
if (res)
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 03f5ce1d3dbe..23ebb92484c6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,7 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9c2028b50e5c..7b1bc9059863 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -147,9 +147,9 @@ static void ext2_put_super (struct super_block * sb)
ext2_quota_off_umount(sb);
- if (sbi->s_mb_cache) {
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_block_cache) {
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1131,9 +1131,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
#ifdef CONFIG_EXT2_FS_XATTR
- sbi->s_mb_cache = ext2_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ sbi->s_ea_block_cache = ext2_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
goto failed_mount3;
}
#endif
@@ -1182,8 +1182,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
- if (sbi->s_mb_cache)
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ if (sbi->s_ea_block_cache)
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fbdb8f171893..1b9b1268d418 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -121,6 +121,8 @@ const struct xattr_handler *ext2_xattr_handlers[] = {
NULL
};
+#define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache)
+
static inline const struct xattr_handler *
ext2_xattr_handler(int name_index)
{
@@ -150,7 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -195,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -208,7 +210,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -246,7 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -281,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -493,8 +495,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect modified block
*/
- mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
- hash, bh->b_blocknr);
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
+ bh->b_blocknr);
/* keep the buffer locked while modifying it. */
} else {
@@ -627,7 +629,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -655,7 +657,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -676,7 +678,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -721,8 +723,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect freed block
*/
- mb_cache_entry_delete_block(ext2_mb_cache,
- hash, old_bh->b_blocknr);
+ mb_cache_entry_delete(ea_block_cache, hash,
+ old_bh->b_blocknr);
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -795,8 +797,8 @@ ext2_xattr_delete_inode(struct inode *inode)
* This must happen under buffer lock for ext2_xattr_set2() to
* reliably detect freed block
*/
- mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
- hash, bh->b_blocknr);
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
+ bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
@@ -897,21 +899,21 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, ce->e_block);
+ bh = sb_bread(inode->i_sb, ce->e_value);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
"inode %ld: block %ld read error",
- inode->i_ino, (unsigned long) ce->e_block);
+ inode->i_ino, (unsigned long) ce->e_value);
} else {
lock_buffer(bh);
/*
@@ -924,27 +926,27 @@ again:
* entry is still hashed is reliable.
*/
if (hlist_bl_unhashed(&ce->e_hash_list)) {
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
unlock_buffer(bh);
brelse(bh);
goto again;
} else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
- (unsigned long) ce->e_block,
+ (unsigned long) ce->e_value,
le32_to_cpu(HDR(bh)->h_refcount),
EXT2_XATTR_REFCOUNT_MAX);
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_touch(ext2_mb_cache, ce);
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3ec0e46de95f..09441ae07a5b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type)
*/
static int
__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
- struct posix_acl *acl)
+ struct posix_acl *acl, int xattr_flags)
{
int name_index;
void *value = NULL;
@@ -218,7 +218,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
error = ext4_xattr_set_handle(handle, inode, name_index, "",
- value, size, 0);
+ value, size, xattr_flags);
kfree(value);
if (!error)
@@ -231,18 +231,23 @@ int
ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
handle_t *handle;
- int error, retries = 0;
+ int error, credits, retries = 0;
+ size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0;
error = dquot_initialize(inode);
if (error)
return error;
retry:
- handle = ext4_journal_start(inode, EXT4_HT_XATTR,
- ext4_jbd2_credits_xattr(inode));
+ error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */,
+ &credits);
+ if (error)
+ return error;
+
+ handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
- error = __ext4_set_acl(handle, inode, type, acl);
+ error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
ext4_journal_stop(handle);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -267,13 +272,13 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
if (default_acl) {
error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
- default_acl);
+ default_acl, XATTR_CREATE);
posix_acl_release(default_acl);
}
if (acl) {
if (!error)
error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
- acl);
+ acl, XATTR_CREATE);
posix_acl_release(acl);
}
return error;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32191548abed..9ebde0cd632e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
/*
* Mount flags set via mount options or defaults
*/
+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -1444,6 +1445,8 @@ struct ext4_sb_info {
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
unsigned int s_mb_free_pending;
+ struct list_head s_freed_data_list; /* List of blocks to be freed
+ after commit completed */
/* tunables */
unsigned long s_stripe;
@@ -1516,7 +1519,8 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
+ struct mb_cache *s_ea_inode_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
@@ -1797,10 +1801,12 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2098,6 +2104,12 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
}
+static inline bool ext4_is_quota_file(struct inode *inode)
+{
+ return IS_NOQUOTA(inode) &&
+ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
+}
+
/*
* This structure is stuffed into the struct file's private_data field
* for directories. It is where we put information so that we can do
@@ -2126,6 +2138,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
*/
#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
+/* htree levels for ext4 */
+#define EXT4_HTREE_LEVEL_COMPAT 2
+#define EXT4_HTREE_LEVEL 3
+
+static inline int ext4_dir_htree_level(struct super_block *sb)
+{
+ return ext4_has_feature_largedir(sb) ?
+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
+}
+
/*
* Timeout and state flag for lazy initialization inode thread.
*/
@@ -2389,16 +2411,17 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
- uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks);
+ uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks);
-#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
__ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
- 0, 0, 0)
+ i_flags, 0, 0, 0)
#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
type, nblocks) \
__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
- (type), __LINE__, (nblocks))
+ 0, (type), __LINE__, (nblocks))
extern void ext4_free_inode(handle_t *, struct inode *);
@@ -2433,6 +2456,7 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
/* inode.c */
int ext4_inode_is_fast_symlink(struct inode *inode);
@@ -2704,19 +2728,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
extern int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed);
-static inline int ext4_has_group_desc_csum(struct super_block *sb)
-{
- return ext4_has_feature_gdt_csum(sb) ||
- EXT4_SB(sb)->s_chksum_driver != NULL;
-}
-
static inline int ext4_has_metadata_csum(struct super_block *sb)
{
WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
!EXT4_SB(sb)->s_chksum_driver);
- return (EXT4_SB(sb)->s_chksum_driver != NULL);
+ return ext4_has_feature_metadata_csum(sb) &&
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
+
+static inline int ext4_has_group_desc_csum(struct super_block *sb)
+{
+ return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
+}
+
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2756,13 +2781,15 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}
-static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
+static inline loff_t ext4_isize(struct super_block *sb,
+ struct ext4_inode *raw_inode)
{
- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
+ if (ext4_has_feature_largedir(sb) ||
+ S_ISREG(le16_to_cpu(raw_inode->i_mode)))
return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
le32_to_cpu(raw_inode->i_size_lo);
- else
- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
+
+ return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
}
static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f97611171023..dabad1bc8617 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -77,7 +77,14 @@
#define EXT4_RESERVE_TRANS_BLOCKS 12U
-#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
+/*
+ * Number of credits needed if we need to insert an entry into a
+ * directory. For each new index block, we need 4 blocks (old index
+ * block, new index block, bitmap block, bg summary). For normal
+ * htree directories there are 2 levels; if the largedir feature
+ * enabled it's 3 levels.
+ */
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
@@ -104,20 +111,6 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-static inline int ext4_jbd2_credits_xattr(struct inode *inode)
-{
- int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
-
- /*
- * In case of inline data, we may push out the data to a block,
- * so we need to reserve credits for this eventuality
- */
- if (ext4_has_inline_data(inode))
- credits += ext4_writepage_trans_blocks(inode) + 1;
- return credits;
-}
-
-
/*
* Ext4 handle operation types -- for logging purposes
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3e36508610b7..e0a8425ff74d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2488,7 +2488,8 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
static inline int get_default_free_blocks_flags(struct inode *inode)
{
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
else if (ext4_should_journal_data(inode))
return EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 58e2eeaa0bc4..58294c9a7e1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -364,13 +364,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- if (ext4_encrypted_inode(inode)) {
- int err = fscrypt_get_encryption_info(inode);
- if (err)
- return 0;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b19436098837..7ec340898598 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -480,6 +480,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start_fsb;
ext4_fsblk_t end_fsb;
+ ext4_fsblk_t bofs;
ext4_fsblk_t eofs;
ext4_group_t start_ag;
ext4_group_t end_ag;
@@ -487,9 +488,12 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
ext4_grpblk_t last_cluster;
int error = 0;
+ bofs = le32_to_cpu(sbi->s_es->s_first_data_block);
eofs = ext4_blocks_count(sbi->s_es);
if (keys[0].fmr_physical >= eofs)
return 0;
+ else if (keys[0].fmr_physical < bofs)
+ keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
start_fsb = keys[0].fmr_physical;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 98ac2f1f23b3..507bfb3344d4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
- ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
@@ -743,8 +742,9 @@ out:
*/
struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
- __u32 goal, uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks)
+ __u32 goal, uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -766,30 +766,69 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ sb = dir->i_sb;
+ sbi = EXT4_SB(sb);
+
+ if (unlikely(ext4_forced_shutdown(sbi)))
return ERR_PTR(-EIO);
- if ((ext4_encrypted_inode(dir) ||
- DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
- (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+ if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) &&
+ !(i_flags & EXT4_EA_INODE_FL)) {
err = fscrypt_get_encryption_info(dir);
if (err)
return ERR_PTR(err);
if (!fscrypt_has_encryption_key(dir))
return ERR_PTR(-ENOKEY);
- if (!handle)
- nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
- sb = dir->i_sb;
+ if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+ struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
+
+ if (p) {
+ int acl_size = p->a_count * sizeof(ext4_acl_entry);
+
+ nblocks += (S_ISDIR(mode) ? 2 : 1) *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, acl_size,
+ true /* is_create */);
+ posix_acl_release(p);
+ }
+#endif
+
+#ifdef CONFIG_SECURITY
+ {
+ int num_security_xattrs = 1;
+
+#ifdef CONFIG_INTEGRITY
+ num_security_xattrs++;
+#endif
+ /*
+ * We assume that security xattrs are never
+ * more than 1k. In practice they are under
+ * 128 bytes.
+ */
+ nblocks += num_security_xattrs *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, 1024,
+ true /* is_create */);
+ }
+#endif
+ if (encrypt)
+ nblocks += __ext4_xattr_set_credits(sb,
+ NULL /* inode */, NULL /* block_bh */,
+ FSCRYPT_SET_CONTEXT_MAX_SIZE,
+ true /* is_create */);
+ }
+
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);
- sbi = EXT4_SB(sb);
/*
* Initialize owners and quota early so that we don't have to account
@@ -1053,6 +1092,7 @@ got:
/* Don't inherit extent flag from directory, amongst others. */
ei->i_flags =
ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+ ei->i_flags |= i_flags;
ei->i_file_acl = 0;
ei->i_dtime = 0;
ei->i_block_group = group;
@@ -1109,13 +1149,15 @@ got:
goto fail_free_drop;
}
- err = ext4_init_acl(handle, inode, dir);
- if (err)
- goto fail_free_drop;
+ if (!(ei->i_flags & EXT4_EA_INODE_FL)) {
+ err = ext4_init_acl(handle, inode, dir);
+ if (err)
+ goto fail_free_drop;
- err = ext4_init_security(handle, inode, dir, qstr);
- if (err)
- goto fail_free_drop;
+ err = ext4_init_security(handle, inode, dir, qstr);
+ if (err)
+ goto fail_free_drop;
+ }
if (ext4_has_feature_extents(sb)) {
/* set extent flag only for directory, file and normal symlink*/
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bc15c2c17633..7ffa290cbb8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -829,7 +829,8 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
int flags = EXT4_FREE_BLOCKS_VALIDATED;
int err;
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8d141c0c8ff9..28c5c3abddb3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_block && entry->e_value_size) {
+ if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cf82d03968c..3c600f02673f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -144,16 +144,12 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
/*
* Test whether an inode is a fast symlink.
+ * A fast symlink has its symlink data stored in ext4_inode_info->i_data.
*/
int ext4_inode_is_fast_symlink(struct inode *inode)
{
- int ea_blocks = EXT4_I(inode)->i_file_acl ?
- EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
-
- if (ext4_has_inline_data(inode))
- return 0;
-
- return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+ return S_ISLNK(inode->i_mode) && inode->i_size &&
+ (inode->i_size < EXT4_N_BLOCKS * 4);
}
/*
@@ -189,6 +185,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
+ int extra_credits = 3;
+ struct ext4_xattr_inode_array *ea_inode_array = NULL;
trace_ext4_evict_inode(inode);
@@ -213,7 +211,8 @@ void ext4_evict_inode(struct inode *inode)
*/
if (inode->i_ino != EXT4_JOURNAL_INO &&
ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_data.nrpages) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -238,8 +237,12 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
+
+ if (!IS_NOQUOTA(inode))
+ extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
+
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
- ext4_blocks_for_truncate(inode)+3);
+ ext4_blocks_for_truncate(inode)+extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -254,6 +257,16 @@ void ext4_evict_inode(struct inode *inode)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+
+ /*
+ * Set inode->i_size to 0 before calling ext4_truncate(). We need
+ * special handling of symlinks here because i_size is used to
+ * determine whether ext4_inode_info->i_data contains symlink data or
+ * block mappings. Setting i_size to 0 will remove its fast symlink
+ * status. Erase i_data so that it becomes a valid empty block map.
+ */
+ if (ext4_inode_is_fast_symlink(inode))
+ memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -271,25 +284,17 @@ void ext4_evict_inode(struct inode *inode)
}
}
- /*
- * ext4_ext_truncate() doesn't reserve any slop when it
- * restarts journal transactions; therefore there may not be
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
- if (!ext4_handle_has_enough_credits(handle, 3)) {
- err = ext4_journal_extend(handle, 3);
- if (err > 0)
- err = ext4_journal_restart(handle, 3);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- stop_handle:
- ext4_journal_stop(handle);
- ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
+ /* Remove xattr references. */
+ err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
+ extra_credits);
+ if (err) {
+ ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
+stop_handle:
+ ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ goto no_delete;
}
/*
@@ -317,6 +322,7 @@ void ext4_evict_inode(struct inode *inode)
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -710,7 +716,7 @@ out_sem:
if (map->m_flags & EXT4_MAP_NEW &&
!(map->m_flags & EXT4_MAP_UNWRITTEN) &&
!(flags & EXT4_GET_BLOCKS_ZERO) &&
- !IS_NOQUOTA(inode) &&
+ !ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode);
@@ -4712,7 +4718,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (ext4_has_feature_64bit(sb))
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
- inode->i_size = ext4_isize(raw_inode);
+ inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
@@ -4846,6 +4852,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
brelse(iloc.bh);
ext4_set_inode_flags(inode);
+
+ if (ei->i_flags & EXT4_EA_INODE_FL) {
+ ext4_xattr_inode_set_class(inode);
+
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ }
+
unlock_new_inode(inode);
return inode;
@@ -5037,7 +5052,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- if (ei->i_disksize != ext4_isize(raw_inode)) {
+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
ext4_isize_set(raw_inode, ei->i_disksize);
need_datasync = 1;
}
@@ -5287,7 +5302,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
+
+ /* dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
error = dquot_transfer(inode, attr);
+ up_read(&EXT4_I(inode)->xattr_sem);
+
if (error) {
ext4_journal_stop(handle);
return error;
@@ -5307,6 +5329,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
+ if (ext4_encrypted_inode(inode)) {
+ error = fscrypt_get_encryption_info(inode);
+ if (error)
+ return error;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0c21e22acd74..42b3a73143cf 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -218,7 +218,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
unsigned int jflag;
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto flags_out;
oldflags = ei->i_flags;
@@ -342,7 +342,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
err = -EPERM;
inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto out_unlock;
err = ext4_get_inode_loc(inode, &iloc);
@@ -373,7 +373,13 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
if (!IS_ERR(transfer_to[PRJQUOTA])) {
+
+ /* __dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
err = __dquot_transfer(inode, transfer_to);
+ up_read(&EXT4_I(inode)->xattr_sem);
dqput(transfer_to[PRJQUOTA]);
if (err)
goto out_dirty;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b7928cddd539..581e357e8406 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -367,8 +367,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce, int rc);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
@@ -2639,6 +2637,7 @@ int ext4_mb_init(struct super_block *sb)
spin_lock_init(&sbi->s_md_lock);
spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_free_pending = 0;
+ INIT_LIST_HEAD(&sbi->s_freed_data_list);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -2782,7 +2781,8 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ struct bio **biop)
{
ext4_fsblk_t discard_block;
@@ -2791,18 +2791,18 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ if (biop) {
+ return __blkdev_issue_discard(sb->s_bdev,
+ (sector_t)discard_block << (sb->s_blocksize_bits - 9),
+ (sector_t)count << (sb->s_blocksize_bits - 9),
+ GFP_NOFS, 0, biop);
+ } else
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
-/*
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce,
- int rc)
+static void ext4_free_data_in_buddy(struct super_block *sb,
+ struct ext4_free_data *entry)
{
- struct ext4_free_data *entry = (struct ext4_free_data *)jce;
struct ext4_buddy e4b;
struct ext4_group_info *db;
int err, count = 0, count2 = 0;
@@ -2810,18 +2810,6 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->efd_count, entry->efd_group, entry);
- if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count);
- if (err && err != -EOPNOTSUPP)
- ext4_msg(sb, KERN_WARNING, "discard request in"
- " group:%d block:%d count:%d failed"
- " with %d", entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count, err);
- }
-
err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);
@@ -2862,6 +2850,56 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
}
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_free_data *entry, *tmp;
+ struct bio *discard_bio = NULL;
+ struct list_head freed_data_list;
+ struct list_head *cut_pos = NULL;
+ int err;
+
+ INIT_LIST_HEAD(&freed_data_list);
+
+ spin_lock(&sbi->s_md_lock);
+ list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
+ if (entry->efd_tid != commit_tid)
+ break;
+ cut_pos = &entry->efd_list;
+ }
+ if (cut_pos)
+ list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
+ cut_pos);
+ spin_unlock(&sbi->s_md_lock);
+
+ if (test_opt(sb, DISCARD)) {
+ list_for_each_entry(entry, &freed_data_list, efd_list) {
+ err = ext4_issue_discard(sb, entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count,
+ &discard_bio);
+ if (err && err != -EOPNOTSUPP) {
+ ext4_msg(sb, KERN_WARNING, "discard request in"
+ " group:%d block:%d count:%d failed"
+ " with %d", entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count, err);
+ } else if (err == -EOPNOTSUPP)
+ break;
+ }
+
+ if (discard_bio)
+ submit_bio_wait(discard_bio);
+ }
+
+ list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+ ext4_free_data_in_buddy(sb, entry);
+}
+
int __init ext4_init_mballoc(void)
{
ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -3529,7 +3567,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_set_bits(bitmap, start, len);
preallocated += len;
}
- mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
+ mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
}
static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -4464,7 +4502,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
trace_ext4_request_blocks(ar);
/* Allow to use superuser reservation for quota file */
- if (IS_NOQUOTA(ar->inode))
+ if (ext4_is_quota_file(ar->inode))
ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
@@ -4583,14 +4621,28 @@ out:
* are contiguous, AND the extents were freed by the same transaction,
* AND the blocks are associated with the same group.
*/
-static int can_merge(struct ext4_free_data *entry1,
- struct ext4_free_data *entry2)
+static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
+ struct ext4_free_data *entry,
+ struct ext4_free_data *new_entry,
+ struct rb_root *entry_rb_root)
{
- if ((entry1->efd_tid == entry2->efd_tid) &&
- (entry1->efd_group == entry2->efd_group) &&
- ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
- return 1;
- return 0;
+ if ((entry->efd_tid != new_entry->efd_tid) ||
+ (entry->efd_group != new_entry->efd_group))
+ return;
+ if (entry->efd_start_cluster + entry->efd_count ==
+ new_entry->efd_start_cluster) {
+ new_entry->efd_start_cluster = entry->efd_start_cluster;
+ new_entry->efd_count += entry->efd_count;
+ } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
+ entry->efd_start_cluster) {
+ new_entry->efd_count += entry->efd_count;
+ } else
+ return;
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->efd_list);
+ spin_unlock(&sbi->s_md_lock);
+ rb_erase(&entry->efd_node, entry_rb_root);
+ kmem_cache_free(ext4_free_data_cachep, entry);
}
static noinline_for_stack int
@@ -4646,29 +4698,19 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
node = rb_prev(new_node);
if (node) {
entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(entry, new_entry) &&
- ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
- new_entry->efd_start_cluster = entry->efd_start_cluster;
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
+ ext4_try_merge_freed_extent(sbi, entry, new_entry,
+ &(db->bb_free_root));
}
node = rb_next(new_node);
if (node) {
entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(new_entry, entry) &&
- ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
- new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
- kmem_cache_free(ext4_free_data_cachep, entry);
- }
+ ext4_try_merge_freed_extent(sbi, entry, new_entry,
+ &(db->bb_free_root));
}
- /* Add the extent to transaction's private list */
- new_entry->efd_jce.jce_func = ext4_free_data_callback;
+
spin_lock(&sbi->s_md_lock);
- _ext4_journal_callback_add(handle, &new_entry->efd_jce);
+ list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
sbi->s_mb_free_pending += clusters;
spin_unlock(&sbi->s_md_lock);
return 0;
@@ -4871,7 +4913,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count);
+ err = ext4_issue_discard(sb, block_group, bit, count,
+ NULL);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5094,7 +5137,7 @@ __acquires(bitlock)
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
- ret = ext4_issue_discard(sb, group, start, count);
+ ret = ext4_issue_discard(sb, group, start, count, NULL);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 2bed62084a8c..009300ee1561 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -78,10 +78,8 @@ do { \
struct ext4_free_data {
- /* MUST be the first member */
- struct ext4_journal_cb_entry efd_jce;
-
- /* ext4_free_data private data starts from here */
+ /* this links the free block information from sb_info */
+ struct list_head efd_list;
/* this links the free block information from group_info */
struct rb_node efd_node;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 364ea4d4a943..cf5181b62df1 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
owner[0] = i_uid_read(inode);
owner[1] = i_gid_read(inode);
tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root),
- S_IFREG, NULL, goal, owner);
+ S_IFREG, NULL, goal, owner, 0);
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c992ef2c2f94..9bb36909ec92 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -484,7 +484,7 @@ mext_check_arguments(struct inode *orig_inode,
return -EBUSY;
}
- if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) {
+ if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
"not be quota files [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 404256caf9cf..13f0cadb1238 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -513,7 +513,7 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
{
- return le32_to_cpu(entry->block) & 0x00ffffff;
+ return le32_to_cpu(entry->block) & 0x0fffffff;
}
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
@@ -739,6 +739,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
frame->bh = ext4_read_dirblock(dir, 0, INDEX);
if (IS_ERR(frame->bh))
return (struct dx_frame *) frame->bh;
@@ -768,9 +769,15 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
}
indirect = root->info.indirect_levels;
- if (indirect > 1) {
- ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
- root->info.indirect_levels);
+ if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
+ ext4_warning(dir->i_sb,
+ "Directory (ino: %lu) htree depth %#06x exceed"
+ "supported value", dir->i_ino,
+ ext4_dir_htree_level(dir->i_sb));
+ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
+ ext4_warning(dir->i_sb, "Enable large directory "
+ "feature to access it");
+ }
goto fail;
}
@@ -859,12 +866,19 @@ fail:
static void dx_release(struct dx_frame *frames)
{
+ struct dx_root_info *info;
+ int i;
+
if (frames[0].bh == NULL)
return;
- if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
- brelse(frames[1].bh);
- brelse(frames[0].bh);
+ info = &((struct dx_root *)frames[0].bh->b_data)->info;
+ for (i = 0; i <= info->indirect_levels; i++) {
+ if (frames[i].bh == NULL)
+ break;
+ brelse(frames[i].bh);
+ frames[i].bh = NULL;
+ }
}
/*
@@ -1050,7 +1064,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
{
struct dx_hash_info hinfo;
struct ext4_dir_entry_2 *de;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct inode *dir;
ext4_lblk_t block;
int count = 0;
@@ -1428,11 +1442,11 @@ restart:
goto next;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- /* read error, skip block & hope for the best */
EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
(unsigned long) block);
brelse(bh);
- goto next;
+ ret = ERR_PTR(-EIO);
+ goto cleanup_and_exit;
}
if (!buffer_verified(bh) &&
!is_dx_internal_node(dir, block,
@@ -1442,7 +1456,8 @@ restart:
EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block);
brelse(bh);
- goto next;
+ ret = ERR_PTR(-EFSBADCRC);
+ goto cleanup_and_exit;
}
set_buffer_verified(bh);
i = search_dirblock(bh, dir, &fname,
@@ -1485,7 +1500,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_dir_entry_2 **res_dir)
{
struct super_block * sb = dir->i_sb;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
@@ -1889,7 +1904,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
*/
dir->i_mtime = dir->i_ctime = current_time(dir);
ext4_update_dx_flag(dir);
- dir->i_version++;
+ inode_inc_iversion(dir);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_dirent_node(handle, dir, bh);
@@ -1908,7 +1923,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
{
struct buffer_head *bh2;
struct dx_root *root;
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
struct ext4_dir_entry_2 *de, *de2;
struct ext4_dir_entry_tail *t;
@@ -2127,13 +2142,16 @@ out:
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode)
{
- struct dx_frame frames[2], *frame;
+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
+ int restart;
int err;
+again:
+ restart = 0;
frame = dx_probe(fname, dir, NULL, frames);
if (IS_ERR(frame))
return PTR_ERR(frame);
@@ -2155,24 +2173,44 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
if (err != -ENOSPC)
goto cleanup;
+ err = 0;
/* Block full, should compress but for now just split */
dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
dx_get_count(entries), dx_get_limit(entries)));
/* Need to split index? */
if (dx_get_count(entries) == dx_get_limit(entries)) {
ext4_lblk_t newblock;
- unsigned icount = dx_get_count(entries);
- int levels = frame - frames;
+ int levels = frame - frames + 1;
+ unsigned int icount;
+ int add_level = 1;
struct dx_entry *entries2;
struct dx_node *node2;
struct buffer_head *bh2;
- if (levels && (dx_get_count(frames->entries) ==
- dx_get_limit(frames->entries))) {
- ext4_warning_inode(dir, "Directory index full!");
+ while (frame > frames) {
+ if (dx_get_count((frame - 1)->entries) <
+ dx_get_limit((frame - 1)->entries)) {
+ add_level = 0;
+ break;
+ }
+ frame--; /* split higher index block */
+ at = frame->at;
+ entries = frame->entries;
+ restart = 1;
+ }
+ if (add_level && levels == ext4_dir_htree_level(sb)) {
+ ext4_warning(sb, "Directory (ino: %lu) index full, "
+ "reach max htree level :%d",
+ dir->i_ino, levels);
+ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
+ ext4_warning(sb, "Large directory feature is "
+ "not enabled on this "
+ "filesystem");
+ }
err = -ENOSPC;
goto cleanup;
}
+ icount = dx_get_count(entries);
bh2 = ext4_append(handle, dir, &newblock);
if (IS_ERR(bh2)) {
err = PTR_ERR(bh2);
@@ -2187,7 +2225,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
err = ext4_journal_get_write_access(handle, frame->bh);
if (err)
goto journal_error;
- if (levels) {
+ if (!add_level) {
unsigned icount1 = icount/2, icount2 = icount - icount1;
unsigned hash2 = dx_get_hash(entries + icount1);
dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
@@ -2195,7 +2233,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
err = ext4_journal_get_write_access(handle,
- frames[0].bh);
+ (frame - 1)->bh);
if (err)
goto journal_error;
@@ -2211,17 +2249,25 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
frame->entries = entries = entries2;
swap(frame->bh, bh2);
}
- dx_insert_block(frames + 0, hash2, newblock);
- dxtrace(dx_show_index("node", frames[1].entries));
+ dx_insert_block((frame - 1), hash2, newblock);
+ dxtrace(dx_show_index("node", frame->entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
err = ext4_handle_dirty_dx_node(handle, dir, bh2);
if (err)
goto journal_error;
brelse (bh2);
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ (frame - 1)->bh);
+ if (err)
+ goto journal_error;
+ if (restart) {
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ frame->bh);
+ goto journal_error;
+ }
} else {
- dxtrace(printk(KERN_DEBUG
- "Creating second level index...\n"));
+ struct dx_root *dxroot;
memcpy((char *) entries2, (char *) entries,
icount * sizeof(struct dx_entry));
dx_set_limit(entries2, dx_node_limit(dir));
@@ -2229,22 +2275,18 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
/* Set up root */
dx_set_count(entries, 1);
dx_set_block(entries + 0, newblock);
- ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-
- /* Add new access path frame */
- frame = frames + 1;
- frame->at = at = at - entries + entries2;
- frame->entries = entries = entries2;
- frame->bh = bh2;
- err = ext4_journal_get_write_access(handle,
- frame->bh);
+ dxroot = (struct dx_root *)frames[0].bh->b_data;
+ dxroot->info.indirect_levels += 1;
+ dxtrace(printk(KERN_DEBUG
+ "Creating %d level index...\n",
+ info->indirect_levels));
+ err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (err)
goto journal_error;
- }
- err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
- if (err) {
- ext4_std_error(inode->i_sb, err);
- goto cleanup;
+ err = ext4_handle_dirty_dx_node(handle, dir, bh2);
+ brelse(bh2);
+ restart = 1;
+ goto journal_error;
}
}
de = do_split(handle, dir, &bh, frame, &fname->hinfo);
@@ -2256,10 +2298,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
goto cleanup;
journal_error:
- ext4_std_error(dir->i_sb, err);
+ ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
cleanup:
brelse(bh);
dx_release(frames);
+ /* @restart is true means htree-path has been changed, we need to
+ * repeat dx_probe() to find out valid htree-path
+ */
+ if (restart && err == 0)
+ goto again;
return err;
}
@@ -2296,7 +2343,7 @@ int ext4_generic_delete_entry(handle_t *handle,
blocksize);
else
de->inode = 0;
- dir->i_version++;
+ inode_inc_iversion(dir);
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len, blocksize);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 18f68f09d393..0886fe82e9c4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -373,6 +373,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
struct ext4_journal_cb_entry *jce;
BUG_ON(txn->t_state == T_FINISHED);
+
+ ext4_process_freed_data(sb, txn->t_tid);
+
spin_lock(&sbi->s_md_lock);
while (!list_empty(&txn->t_private_list)) {
jce = list_entry(txn->t_private_list.next,
@@ -927,9 +930,13 @@ static void ext4_put_super(struct super_block *sb)
invalidate_bdev(sbi->journal_bdev);
ext4_blkdev_remove(sbi);
}
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_inode_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+ }
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
@@ -1143,7 +1150,16 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
handle_t *handle = fs_data;
- int res, res2, retries = 0;
+ int res, res2, credits, retries = 0;
+
+ /*
+ * Encrypting the root directory is not allowed because e2fsck expects
+ * lost+found to exist and be unencrypted, and encrypting the root
+ * directory would imply encrypting the lost+found directory as well as
+ * the filename "lost+found" itself.
+ */
+ if (inode->i_ino == EXT4_ROOT_INO)
+ return -EPERM;
res = ext4_convert_inline_data(inode);
if (res)
@@ -1178,8 +1194,12 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (res)
return res;
retry:
- handle = ext4_journal_start(inode, EXT4_HT_MISC,
- ext4_jbd2_credits_xattr(inode));
+ res = ext4_xattr_set_credits(inode, len, false /* is_create */,
+ &credits);
+ if (res)
+ return res;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1256,16 +1276,17 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
}
static const struct dquot_operations ext4_quota_operations = {
- .get_reserved_space = ext4_get_reserved_space,
- .write_dquot = ext4_write_dquot,
- .acquire_dquot = ext4_acquire_dquot,
- .release_dquot = ext4_release_dquot,
- .mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info,
- .alloc_dquot = dquot_alloc,
- .destroy_dquot = dquot_destroy,
- .get_projid = ext4_get_projid,
- .get_next_id = ext4_get_next_id,
+ .get_reserved_space = ext4_get_reserved_space,
+ .write_dquot = ext4_write_dquot,
+ .acquire_dquot = ext4_acquire_dquot,
+ .release_dquot = ext4_release_dquot,
+ .mark_dirty = ext4_mark_dquot_dirty,
+ .write_info = ext4_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
+ .get_inode_usage = ext4_get_inode_usage,
+ .get_next_id = ext4_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1328,7 +1349,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
};
static const match_table_t tokens = {
@@ -1411,6 +1432,8 @@ static const match_table_t tokens = {
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_nombcache, "nombcache"},
+ {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1618,6 +1641,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_err, 0, 0}
};
@@ -3445,7 +3469,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
/* Load the checksum driver */
- if (ext4_has_feature_metadata_csum(sb)) {
+ if (ext4_has_feature_metadata_csum(sb) ||
+ ext4_has_feature_ea_inode(sb)) {
sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
@@ -3467,7 +3492,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Precompute checksum seed for all metadata */
if (ext4_has_feature_csum_seed(sb))
sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
- else if (ext4_has_metadata_csum(sb))
+ else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
sizeof(es->s_uuid));
@@ -3597,6 +3622,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"The Hurd can't support 64-bit file systems");
goto failed_mount;
}
+
+ /*
+ * ea_inode feature uses l_i_version field which is not
+ * available in HURD_COMPAT mode.
+ */
+ if (ext4_has_feature_ea_inode(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "ea_inode feature is not supported for Hurd");
+ goto failed_mount;
+ }
}
if (IS_EXT2_SB(sb)) {
@@ -4061,10 +4096,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_ea_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_block_cache");
+ goto failed_mount_wq;
+ }
+
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode_cache");
+ goto failed_mount_wq;
+ }
+ }
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4296,9 +4343,13 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_inode_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+ }
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
@@ -4957,6 +5008,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index d74dc5f81a04..48c7a7d55ed3 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (!ret || val >= clusters)
+ if (ret || val >= clusters)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5d3c2536641c..cff4f41ced61 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -72,12 +72,14 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
-static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
-static void ext4_xattr_rehash(struct ext4_xattr_header *,
- struct ext4_xattr_entry *);
+static void ext4_xattr_block_cache_insert(struct mb_cache *,
+ struct buffer_head *);
+static struct buffer_head *
+ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
+ struct mb_cache_entry **);
+static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
+ size_t value_count);
+static void ext4_xattr_rehash(struct ext4_xattr_header *);
static const struct xattr_handler * const ext4_xattr_handler_map[] = {
[EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
@@ -104,8 +106,22 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
NULL
};
-#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
- inode->i_sb->s_fs_info)->s_mb_cache)
+#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
+ inode->i_sb->s_fs_info)->s_ea_block_cache)
+
+#define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \
+ inode->i_sb->s_fs_info)->s_ea_inode_cache)
+
+static int
+ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ struct inode *inode);
+
+#ifdef CONFIG_LOCKDEP
+void ext4_xattr_inode_set_class(struct inode *ea_inode)
+{
+ lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+}
+#endif
static __le32 ext4_xattr_block_csum(struct inode *inode,
sector_t block_nr,
@@ -177,9 +193,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
/* Check the values */
while (!IS_LAST_ENTRY(entry)) {
- if (entry->e_value_block != 0)
- return -EFSCORRUPTED;
- if (entry->e_value_size != 0) {
+ if (entry->e_value_size != 0 &&
+ entry->e_value_inum == 0) {
u16 offs = le16_to_cpu(entry->e_value_offs);
u32 size = le32_to_cpu(entry->e_value_size);
void *value;
@@ -269,6 +284,185 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
return cmp ? -ENODATA : 0;
}
+static u32
+ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
+{
+ return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
+}
+
+static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
+{
+ return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+ ((u32)ea_inode->i_version);
+}
+
+static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
+{
+ ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+ ea_inode->i_version = (u32)ref_count;
+}
+
+static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode)
+{
+ return (u32)ea_inode->i_atime.tv_sec;
+}
+
+static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
+{
+ ea_inode->i_atime.tv_sec = hash;
+}
+
+/*
+ * Read the EA value from an inode.
+ */
+static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
+{
+ unsigned long block = 0;
+ struct buffer_head *bh;
+ int blocksize = ea_inode->i_sb->s_blocksize;
+ size_t csize, copied = 0;
+ void *copy_pos = buf;
+
+ while (copied < size) {
+ csize = (size - copied) > blocksize ? blocksize : size - copied;
+ bh = ext4_bread(NULL, ea_inode, block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ if (!bh)
+ return -EFSCORRUPTED;
+
+ memcpy(copy_pos, bh->b_data, csize);
+ brelse(bh);
+
+ copy_pos += csize;
+ block += 1;
+ copied += csize;
+ }
+ return 0;
+}
+
+static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ struct inode **ea_inode)
+{
+ struct inode *inode;
+ int err;
+
+ inode = ext4_iget(parent->i_sb, ea_ino);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ext4_error(parent->i_sb,
+ "error while reading EA inode %lu err=%d", ea_ino,
+ err);
+ return err;
+ }
+
+ if (is_bad_inode(inode)) {
+ ext4_error(parent->i_sb,
+ "error while reading EA inode %lu is_bad_inode",
+ ea_ino);
+ err = -EIO;
+ goto error;
+ }
+
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ ext4_error(parent->i_sb,
+ "EA inode %lu does not have EXT4_EA_INODE_FL flag",
+ ea_ino);
+ err = -EINVAL;
+ goto error;
+ }
+
+ *ea_inode = inode;
+ return 0;
+error:
+ iput(inode);
+ return err;
+}
+
+static int
+ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+ struct ext4_xattr_entry *entry, void *buffer,
+ size_t size)
+{
+ u32 hash;
+
+ /* Verify stored hash matches calculated hash. */
+ hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size);
+ if (hash != ext4_xattr_inode_get_hash(ea_inode))
+ return -EFSCORRUPTED;
+
+ if (entry) {
+ __le32 e_hash, tmp_data;
+
+ /* Verify entry hash. */
+ tmp_data = cpu_to_le32(hash);
+ e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
+ &tmp_data, 1);
+ if (e_hash != entry->e_hash)
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+
+#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
+
+/*
+ * Read xattr value from the EA inode.
+ */
+static int
+ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
+ void *buffer, size_t size)
+{
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
+ struct inode *ea_inode;
+ int err;
+
+ err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
+ &ea_inode);
+ if (err) {
+ ea_inode = NULL;
+ goto out;
+ }
+
+ if (i_size_read(ea_inode) != size) {
+ ext4_warning_inode(ea_inode,
+ "ea_inode file size=%llu entry size=%zu",
+ i_size_read(ea_inode), size);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
+ err = ext4_xattr_inode_read(ea_inode, buffer, size);
+ if (err)
+ goto out;
+
+ err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
+ /*
+ * Compatibility check for old Lustre ea_inode implementation. Old
+ * version does not have hash validation, but it has a backpointer
+ * from ea_inode to the parent inode.
+ */
+ if (err == -EFSCORRUPTED) {
+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
+ ea_inode->i_generation != inode->i_generation) {
+ ext4_warning_inode(ea_inode,
+ "EA inode hash validation failed");
+ goto out;
+ }
+ /* Do not add ea_inode to the cache. */
+ ea_inode_cache = NULL;
+ } else if (err)
+ goto out;
+
+ if (ea_inode_cache)
+ mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
+ ext4_xattr_inode_get_hash(ea_inode),
+ ea_inode->i_ino, true /* reusable */);
+out:
+ iput(ea_inode);
+ return err;
+}
+
static int
ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
@@ -277,7 +471,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -298,7 +492,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, bh);
entry = BFIRST(bh);
error = ext4_xattr_find_entry(&entry, name_index, name, 1);
if (error)
@@ -308,8 +502,15 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
- size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer,
+ size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, bh->b_data +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -350,8 +551,15 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, (void *)IFIRST(header) +
- le16_to_cpu(entry->e_value_offs), size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer,
+ size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, (void *)IFIRST(header) +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -428,7 +636,6 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -450,7 +657,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
@@ -539,15 +746,445 @@ static void ext4_xattr_update_super_block(handle_t *handle,
}
}
+int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
+{
+ struct ext4_iloc iloc = { .bh = NULL };
+ struct buffer_head *bh = NULL;
+ struct ext4_inode *raw_inode;
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_xattr_entry *entry;
+ qsize_t ea_inode_refs = 0;
+ void *end;
+ int ret;
+
+ lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret)
+ goto out;
+ raw_inode = ext4_raw_inode(&iloc);
+ header = IHDR(inode, raw_inode);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+ ret = xattr_check_inode(inode, header, end);
+ if (ret)
+ goto out;
+
+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ea_inode_refs++;
+ }
+
+ if (EXT4_I(inode)->i_file_acl) {
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (ext4_xattr_check_block(inode, bh)) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+
+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ea_inode_refs++;
+ }
+ *usage = ea_inode_refs + 1;
+ ret = 0;
+out:
+ brelse(iloc.bh);
+ brelse(bh);
+ return ret;
+}
+
+static inline size_t round_up_cluster(struct inode *inode, size_t length)
+{
+ struct super_block *sb = inode->i_sb;
+ size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits +
+ inode->i_blkbits);
+ size_t mask = ~(cluster_size - 1);
+
+ return (length + cluster_size - 1) & mask;
+}
+
+static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
+{
+ int err;
+
+ err = dquot_alloc_inode(inode);
+ if (err)
+ return err;
+ err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len));
+ if (err)
+ dquot_free_inode(inode);
+ return err;
+}
+
+static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
+{
+ dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
+ dquot_free_inode(inode);
+}
+
+int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ struct buffer_head *block_bh, size_t value_len,
+ bool is_create)
+{
+ int credits;
+ int blocks;
+
+ /*
+ * 1) Owner inode update
+ * 2) Ref count update on old xattr block
+ * 3) new xattr block
+ * 4) block bitmap update for new xattr block
+ * 5) group descriptor for new xattr block
+ * 6) block bitmap update for old xattr block
+ * 7) group descriptor for old block
+ *
+ * 6 & 7 can happen if we have two racing threads T_a and T_b
+ * which are each trying to set an xattr on inodes I_a and I_b
+ * which were both initially sharing an xattr block.
+ */
+ credits = 7;
+
+ /* Quota updates. */
+ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb);
+
+ /*
+ * In case of inline data, we may push out the data to a block,
+ * so we need to reserve credits for this eventuality
+ */
+ if (inode && ext4_has_inline_data(inode))
+ credits += ext4_writepage_trans_blocks(inode) + 1;
+
+ /* We are done if ea_inode feature is not enabled. */
+ if (!ext4_has_feature_ea_inode(sb))
+ return credits;
+
+ /* New ea_inode, inode map, block bitmap, group descriptor. */
+ credits += 4;
+
+ /* Data blocks. */
+ blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+
+ /* Indirection block or one level of extent tree. */
+ blocks += 1;
+
+ /* Block bitmap and group descriptor updates for each block. */
+ credits += blocks * 2;
+
+ /* Blocks themselves. */
+ credits += blocks;
+
+ if (!is_create) {
+ /* Dereference ea_inode holding old xattr value.
+ * Old ea_inode, inode map, block bitmap, group descriptor.
+ */
+ credits += 4;
+
+ /* Data blocks for old ea_inode. */
+ blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
+
+ /* Indirection block or one level of extent tree for old
+ * ea_inode.
+ */
+ blocks += 1;
+
+ /* Block bitmap and group descriptor updates for each block. */
+ credits += blocks * 2;
+ }
+
+ /* We may need to clone the existing xattr block in which case we need
+ * to increment ref counts for existing ea_inodes referenced by it.
+ */
+ if (block_bh) {
+ struct ext4_xattr_entry *entry = BFIRST(block_bh);
+
+ for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ /* Ref count update on ea_inode. */
+ credits += 1;
+ }
+ return credits;
+}
+
+static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
+ int credits, struct buffer_head *bh,
+ bool dirty, bool block_csum)
+{
+ int error;
+
+ if (!ext4_handle_valid(handle))
+ return 0;
+
+ if (handle->h_buffer_credits >= credits)
+ return 0;
+
+ error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
+ if (!error)
+ return 0;
+ if (error < 0) {
+ ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
+ return error;
+ }
+
+ if (bh && dirty) {
+ if (block_csum)
+ ext4_xattr_block_csum_set(inode, bh);
+ error = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (error) {
+ ext4_warning(inode->i_sb, "Handle metadata (error %d)",
+ error);
+ return error;
+ }
+ }
+
+ error = ext4_journal_restart(handle, credits);
+ if (error) {
+ ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
+ return error;
+ }
+
+ if (bh) {
+ error = ext4_journal_get_write_access(handle, bh);
+ if (error) {
+ ext4_warning(inode->i_sb,
+ "Get write access failed (error %d)",
+ error);
+ return error;
+ }
+ }
+ return 0;
+}
+
+static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ int ref_change)
+{
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+ struct ext4_iloc iloc;
+ s64 ref_count;
+ u32 hash;
+ int ret;
+
+ inode_lock(ea_inode);
+
+ ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
+ if (ret) {
+ iloc.bh = NULL;
+ goto out;
+ }
+
+ ref_count = ext4_xattr_inode_get_ref(ea_inode);
+ ref_count += ref_change;
+ ext4_xattr_inode_set_ref(ea_inode, ref_count);
+
+ if (ref_change > 0) {
+ WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
+ ea_inode->i_ino, ref_count);
+
+ if (ref_count == 1) {
+ WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
+ ea_inode->i_ino, ea_inode->i_nlink);
+
+ set_nlink(ea_inode, 1);
+ ext4_orphan_del(handle, ea_inode);
+
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_create(ea_inode_cache,
+ GFP_NOFS, hash,
+ ea_inode->i_ino,
+ true /* reusable */);
+ }
+ }
+ } else {
+ WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+ ea_inode->i_ino, ref_count);
+
+ if (ref_count == 0) {
+ WARN_ONCE(ea_inode->i_nlink != 1,
+ "EA inode %lu i_nlink=%u",
+ ea_inode->i_ino, ea_inode->i_nlink);
+
+ clear_nlink(ea_inode);
+ ext4_orphan_add(handle, ea_inode);
+
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_delete(ea_inode_cache, hash,
+ ea_inode->i_ino);
+ }
+ }
+ }
+
+ ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
+ iloc.bh = NULL;
+ if (ret)
+ ext4_warning_inode(ea_inode,
+ "ext4_mark_iloc_dirty() failed ret=%d", ret);
+out:
+ brelse(iloc.bh);
+ inode_unlock(ea_inode);
+ return ret;
+}
+
+static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
+{
+ return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
+}
+
+static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
+{
+ return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
+}
+
+static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
+ struct ext4_xattr_entry *first)
+{
+ struct inode *ea_inode;
+ struct ext4_xattr_entry *entry;
+ struct ext4_xattr_entry *failed_entry;
+ unsigned int ea_ino;
+ int err, saved_err;
+
+ for (entry = first; !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err)
+ goto cleanup;
+ err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode, "inc ref error %d", err);
+ iput(ea_inode);
+ goto cleanup;
+ }
+ iput(ea_inode);
+ }
+ return 0;
+
+cleanup:
+ saved_err = err;
+ failed_entry = entry;
+
+ for (entry = first; entry != failed_entry;
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err) {
+ ext4_warning(parent->i_sb,
+ "cleanup ea_ino %u iget error %d", ea_ino,
+ err);
+ continue;
+ }
+ err = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (err)
+ ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
+ err);
+ iput(ea_inode);
+ }
+ return saved_err;
+}
+
+static void
+ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
+ struct buffer_head *bh,
+ struct ext4_xattr_entry *first, bool block_csum,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits, bool skip_quota)
+{
+ struct inode *ea_inode;
+ struct ext4_xattr_entry *entry;
+ bool dirty = false;
+ unsigned int ea_ino;
+ int err;
+ int credits;
+
+ /* One credit for dec ref on ea_inode, one for orphan list addition, */
+ credits = 2 + extra_credits;
+
+ for (entry = first; !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err)
+ continue;
+
+ err = ext4_expand_inode_array(ea_inode_array, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode,
+ "Expand inode array err=%d", err);
+ iput(ea_inode);
+ continue;
+ }
+
+ err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
+ dirty, block_csum);
+ if (err) {
+ ext4_warning_inode(ea_inode, "Ensure credits err=%d",
+ err);
+ continue;
+ }
+
+ err = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d",
+ err);
+ continue;
+ }
+
+ if (!skip_quota)
+ ext4_xattr_inode_free_quota(parent,
+ le32_to_cpu(entry->e_value_size));
+
+ /*
+ * Forget about ea_inode within the same transaction that
+ * decrements the ref count. This avoids duplicate decrements in
+ * case the rest of the work spills over to subsequent
+ * transactions.
+ */
+ entry->e_value_inum = 0;
+ entry->e_value_size = 0;
+
+ dirty = true;
+ }
+
+ if (dirty) {
+ /*
+ * Note that we are deliberately skipping csum calculation for
+ * the final update because we do not expect any journal
+ * restarts until xattr block is freed.
+ */
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ ext4_warning_inode(parent,
+ "handle dirty metadata err=%d", err);
+ }
+}
+
/*
* Release the xattr block BH: If the reference count is > 1, decrement it;
* otherwise free the block.
*/
static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
- struct buffer_head *bh)
+ struct buffer_head *bh,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
{
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
u32 hash, ref;
int error = 0;
@@ -565,9 +1202,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
+
+ if (ext4_has_feature_ea_inode(inode->i_sb))
+ ext4_xattr_inode_dec_ref_all(handle, inode, bh,
+ BFIRST(bh),
+ true /* block_csum */,
+ ea_inode_array,
+ extra_credits,
+ true /* skip_quota */);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
@@ -577,11 +1224,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(ext4_mb_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
- mb_cache_entry_put(ext4_mb_cache, ce);
+ if (ea_block_cache) {
+ ce = mb_cache_entry_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ea_block_cache, ce);
+ }
}
}
@@ -620,7 +1269,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
@@ -631,113 +1280,454 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
return (*min_offs - ((void *)last - base) - sizeof(__u32));
}
-static int
-ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
+/*
+ * Write the value of the EA in an inode.
+ */
+static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
+ const void *buf, int bufsize)
+{
+ struct buffer_head *bh = NULL;
+ unsigned long block = 0;
+ int blocksize = ea_inode->i_sb->s_blocksize;
+ int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
+ int csize, wsize = 0;
+ int ret = 0;
+ int retries = 0;
+
+retry:
+ while (ret >= 0 && ret < max_blocks) {
+ struct ext4_map_blocks map;
+ map.m_lblk = block += ret;
+ map.m_len = max_blocks -= ret;
+
+ ret = ext4_map_blocks(handle, ea_inode, &map,
+ EXT4_GET_BLOCKS_CREATE);
+ if (ret <= 0) {
+ ext4_mark_inode_dirty(handle, ea_inode);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
+ ret = 0;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ if (ret < 0)
+ return ret;
+
+ block = 0;
+ while (wsize < bufsize) {
+ if (bh != NULL)
+ brelse(bh);
+ csize = (bufsize - wsize) > blocksize ? blocksize :
+ bufsize - wsize;
+ bh = ext4_getblk(handle, ea_inode, block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (ret)
+ goto out;
+
+ memcpy(bh->b_data, buf, csize);
+ set_buffer_uptodate(bh);
+ ext4_handle_dirty_metadata(handle, ea_inode, bh);
+
+ buf += csize;
+ wsize += csize;
+ block += 1;
+ }
+
+ inode_lock(ea_inode);
+ i_size_write(ea_inode, wsize);
+ ext4_update_i_disksize(ea_inode, wsize);
+ inode_unlock(ea_inode);
+
+ ext4_mark_inode_dirty(handle, ea_inode);
+
+out:
+ brelse(bh);
+
+ return ret;
+}
+
+/*
+ * Create an inode to store the value of a large EA.
+ */
+static struct inode *ext4_xattr_inode_create(handle_t *handle,
+ struct inode *inode, u32 hash)
+{
+ struct inode *ea_inode = NULL;
+ uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
+ int err;
+
+ /*
+ * Let the next inode be the goal, so we try and allocate the EA inode
+ * in the same group, or nearby one.
+ */
+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+ S_IFREG | 0600, NULL, inode->i_ino + 1, owner,
+ EXT4_EA_INODE_FL);
+ if (!IS_ERR(ea_inode)) {
+ ea_inode->i_op = &ext4_file_inode_operations;
+ ea_inode->i_fop = &ext4_file_operations;
+ ext4_set_aops(ea_inode);
+ ext4_xattr_inode_set_class(ea_inode);
+ unlock_new_inode(ea_inode);
+ ext4_xattr_inode_set_ref(ea_inode, 1);
+ ext4_xattr_inode_set_hash(ea_inode, hash);
+ err = ext4_mark_inode_dirty(handle, ea_inode);
+ if (!err)
+ err = ext4_inode_attach_jinode(ea_inode);
+ if (err) {
+ iput(ea_inode);
+ return ERR_PTR(err);
+ }
+
+ /*
+ * Xattr inodes are shared therefore quota charging is performed
+ * at a higher level.
+ */
+ dquot_free_inode(ea_inode);
+ dquot_drop(ea_inode);
+ inode_lock(ea_inode);
+ ea_inode->i_flags |= S_NOQUOTA;
+ inode_unlock(ea_inode);
+ }
+
+ return ea_inode;
+}
+
+static struct inode *
+ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+ size_t value_len, u32 hash)
+{
+ struct inode *ea_inode;
+ struct mb_cache_entry *ce;
+ struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
+ void *ea_data;
+
+ if (!ea_inode_cache)
+ return NULL;
+
+ ce = mb_cache_entry_find_first(ea_inode_cache, hash);
+ if (!ce)
+ return NULL;
+
+ ea_data = ext4_kvmalloc(value_len, GFP_NOFS);
+ if (!ea_data) {
+ mb_cache_entry_put(ea_inode_cache, ce);
+ return NULL;
+ }
+
+ while (ce) {
+ ea_inode = ext4_iget(inode->i_sb, ce->e_value);
+ if (!IS_ERR(ea_inode) &&
+ !is_bad_inode(ea_inode) &&
+ (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
+ i_size_read(ea_inode) == value_len &&
+ !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
+ !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
+ value_len) &&
+ !memcmp(value, ea_data, value_len)) {
+ mb_cache_entry_touch(ea_inode_cache, ce);
+ mb_cache_entry_put(ea_inode_cache, ce);
+ kvfree(ea_data);
+ return ea_inode;
+ }
+
+ if (!IS_ERR(ea_inode))
+ iput(ea_inode);
+ ce = mb_cache_entry_find_next(ea_inode_cache, ce);
+ }
+ kvfree(ea_data);
+ return NULL;
+}
+
+/*
+ * Add value of the EA in an inode.
+ */
+static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+ const void *value, size_t value_len,
+ struct inode **ret_inode)
+{
+ struct inode *ea_inode;
+ u32 hash;
+ int err;
+
+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
+ ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
+ if (ea_inode) {
+ err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+ if (err) {
+ iput(ea_inode);
+ return err;
+ }
+
+ *ret_inode = ea_inode;
+ return 0;
+ }
+
+ /* Create an inode for the EA value */
+ ea_inode = ext4_xattr_inode_create(handle, inode, hash);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+
+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+ if (err) {
+ ext4_xattr_inode_dec_ref(handle, ea_inode);
+ iput(ea_inode);
+ return err;
+ }
+
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
+
+ *ret_inode = ea_inode;
+ return 0;
+}
+
+/*
+ * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
+ * feature is enabled.
+ */
+#define EXT4_XATTR_BLOCK_RESERVE(inode) min(i_blocksize(inode)/8, 1024U)
+
+static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ struct ext4_xattr_search *s,
+ handle_t *handle, struct inode *inode,
+ bool is_block)
{
struct ext4_xattr_entry *last;
- size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+ struct ext4_xattr_entry *here = s->here;
+ size_t min_offs = s->end - s->base, name_len = strlen(i->name);
+ int in_inode = i->in_inode;
+ struct inode *old_ea_inode = NULL;
+ struct inode *new_ea_inode = NULL;
+ size_t old_size, new_size;
+ int ret;
+
+ /* Space used by old and new values. */
+ old_size = (!s->not_found && !here->e_value_inum) ?
+ EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0;
+ new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0;
+
+ /*
+ * Optimization for the simple case when old and new values have the
+ * same padded sizes. Not applicable if external inodes are involved.
+ */
+ if (new_size && new_size == old_size) {
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ void *val = s->base + offs;
+
+ here->e_value_size = cpu_to_le32(i->value_len);
+ if (i->value == EXT4_ZERO_XATTR_VALUE) {
+ memset(val, 0, new_size);
+ } else {
+ memcpy(val, i->value, i->value_len);
+ /* Clear padding bytes. */
+ memset(val + i->value_len, 0, new_size - i->value_len);
+ }
+ return 0;
+ }
/* Compute min_offs and last. */
last = s->first;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
}
- free = min_offs - ((void *)last - s->base) - sizeof(__u32);
- if (!s->not_found) {
- if (s->here->e_value_size) {
- size_t size = le32_to_cpu(s->here->e_value_size);
- free += EXT4_XATTR_SIZE(size);
- }
- free += EXT4_XATTR_LEN(name_len);
- }
+
+ /* Check whether we have enough space. */
if (i->value) {
- if (free < EXT4_XATTR_LEN(name_len) +
- EXT4_XATTR_SIZE(i->value_len))
- return -ENOSPC;
+ size_t free;
+
+ free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+ if (!s->not_found)
+ free += EXT4_XATTR_LEN(name_len) + old_size;
+
+ if (free < EXT4_XATTR_LEN(name_len) + new_size) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
+ * If storing the value in an external inode is an option,
+ * reserve space for xattr entries/names in the external
+ * attribute block so that a long value does not occupy the
+ * whole space and prevent futher entries being added.
+ */
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ new_size && is_block &&
+ (min_offs + old_size - new_size) <
+ EXT4_XATTR_BLOCK_RESERVE(inode)) {
+ ret = -ENOSPC;
+ goto out;
+ }
}
- if (i->value && s->not_found) {
- /* Insert the new name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
- memmove((void *)s->here + size, s->here, rest);
- memset(s->here, 0, size);
- s->here->e_name_index = i->name_index;
- s->here->e_name_len = name_len;
- memcpy(s->here->e_name, i->name, name_len);
- } else {
- if (s->here->e_value_size) {
- void *first_val = s->base + min_offs;
- size_t offs = le16_to_cpu(s->here->e_value_offs);
- void *val = s->base + offs;
- size_t size = EXT4_XATTR_SIZE(
- le32_to_cpu(s->here->e_value_size));
-
- if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
- /* The old and the new value have the same
- size. Just replace. */
- s->here->e_value_size =
- cpu_to_le32(i->value_len);
- if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
- } else {
- /* Clear pad bytes first. */
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD);
- memcpy(val, i->value, i->value_len);
- }
- return 0;
- }
+ /*
+ * Getting access to old and new ea inodes is subject to failures.
+ * Finish that work before doing any modifications to the xattr data.
+ */
+ if (!s->not_found && here->e_value_inum) {
+ ret = ext4_xattr_inode_iget(inode,
+ le32_to_cpu(here->e_value_inum),
+ &old_ea_inode);
+ if (ret) {
+ old_ea_inode = NULL;
+ goto out;
+ }
+ }
+ if (i->value && in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
+ if (ret)
+ goto out;
+
+ ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
+ i->value_len,
+ &new_ea_inode);
+ if (ret) {
+ new_ea_inode = NULL;
+ ext4_xattr_inode_free_quota(inode, i->value_len);
+ goto out;
+ }
+ }
- /* Remove the old value. */
- memmove(first_val + size, first_val, val - first_val);
- memset(first_val, 0, size);
- s->here->e_value_size = 0;
- s->here->e_value_offs = 0;
- min_offs += size;
-
- /* Adjust all value offsets. */
- last = s->first;
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
- if (last->e_value_size && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT4_XATTR_NEXT(last);
+ if (old_ea_inode) {
+ /* We are ready to release ref count on the old_ea_inode. */
+ ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
+ if (ret) {
+ /* Release newly required ref count on new_ea_inode. */
+ if (new_ea_inode) {
+ int err;
+
+ err = ext4_xattr_inode_dec_ref(handle,
+ new_ea_inode);
+ if (err)
+ ext4_warning_inode(new_ea_inode,
+ "dec ref new_ea_inode err=%d",
+ err);
+ ext4_xattr_inode_free_quota(inode,
+ i->value_len);
}
+ goto out;
}
- if (!i->value) {
- /* Remove the old name. */
- size_t size = EXT4_XATTR_LEN(name_len);
- last = ENTRY((void *)last - size);
- memmove(s->here, (void *)s->here + size,
- (void *)last - (void *)s->here + sizeof(__u32));
- memset(last, 0, size);
+
+ ext4_xattr_inode_free_quota(inode,
+ le32_to_cpu(here->e_value_size));
+ }
+
+ /* No failures allowed past this point. */
+
+ if (!s->not_found && here->e_value_offs) {
+ /* Remove the old value. */
+ void *first_val = s->base + min_offs;
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ void *val = s->base + offs;
+
+ memmove(first_val + old_size, first_val, val - first_val);
+ memset(first_val, 0, old_size);
+ min_offs += old_size;
+
+ /* Adjust all value offsets. */
+ last = s->first;
+ while (!IS_LAST_ENTRY(last)) {
+ size_t o = le16_to_cpu(last->e_value_offs);
+
+ if (!last->e_value_inum &&
+ last->e_value_size && o < offs)
+ last->e_value_offs = cpu_to_le16(o + old_size);
+ last = EXT4_XATTR_NEXT(last);
}
}
+ if (!i->value) {
+ /* Remove old name. */
+ size_t size = EXT4_XATTR_LEN(name_len);
+
+ last = ENTRY((void *)last - size);
+ memmove(here, (void *)here + size,
+ (void *)last - (void *)here + sizeof(__u32));
+ memset(last, 0, size);
+ } else if (s->not_found) {
+ /* Insert new name. */
+ size_t size = EXT4_XATTR_LEN(name_len);
+ size_t rest = (void *)last - (void *)here + sizeof(__u32);
+
+ memmove((void *)here + size, here, rest);
+ memset(here, 0, size);
+ here->e_name_index = i->name_index;
+ here->e_name_len = name_len;
+ memcpy(here->e_name, i->name, name_len);
+ } else {
+ /* This is an update, reset value info. */
+ here->e_value_inum = 0;
+ here->e_value_offs = 0;
+ here->e_value_size = 0;
+ }
+
if (i->value) {
- /* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
- if (i->value_len) {
- size_t size = EXT4_XATTR_SIZE(i->value_len);
- void *val = s->base + min_offs - size;
- s->here->e_value_offs = cpu_to_le16(min_offs - size);
+ /* Insert new value. */
+ if (in_inode) {
+ here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino);
+ } else if (i->value_len) {
+ void *val = s->base + min_offs - new_size;
+
+ here->e_value_offs = cpu_to_le16(min_offs - new_size);
if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
+ memset(val, 0, new_size);
} else {
- /* Clear the pad bytes first. */
- memset(val + size - EXT4_XATTR_PAD, 0,
- EXT4_XATTR_PAD);
memcpy(val, i->value, i->value_len);
+ /* Clear padding bytes. */
+ memset(val + i->value_len, 0,
+ new_size - i->value_len);
}
}
+ here->e_value_size = cpu_to_le32(i->value_len);
}
- return 0;
+
+ if (i->value) {
+ __le32 hash = 0;
+
+ /* Entry hash calculation. */
+ if (in_inode) {
+ __le32 crc32c_hash;
+
+ /*
+ * Feed crc32c hash instead of the raw value for entry
+ * hash calculation. This is to avoid walking
+ * potentially long value buffer again.
+ */
+ crc32c_hash = cpu_to_le32(
+ ext4_xattr_inode_get_hash(new_ea_inode));
+ hash = ext4_xattr_hash_entry(here->e_name,
+ here->e_name_len,
+ &crc32c_hash, 1);
+ } else if (is_block) {
+ __le32 *value = s->base + min_offs - new_size;
+
+ hash = ext4_xattr_hash_entry(here->e_name,
+ here->e_name_len, value,
+ new_size >> 2);
+ }
+ here->e_hash = hash;
+ }
+
+ if (is_block)
+ ext4_xattr_rehash((struct ext4_xattr_header *)s->base);
+
+ ret = 0;
+out:
+ iput(old_ea_inode);
+ iput(new_ea_inode);
+ return ret;
}
struct ext4_xattr_block_find {
@@ -794,15 +1784,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
{
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
- struct ext4_xattr_search *s = &bs->s;
+ struct ext4_xattr_search s_copy = bs->s;
+ struct ext4_xattr_search *s = &s_copy;
struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+ struct inode *ea_inode = NULL;
+ size_t old_ea_inode_size = 0;
#define header(x) ((struct ext4_xattr_header *)(x))
- if (i->value && i->value_len > sb->s_blocksize)
- return -ENOSPC;
if (s->base) {
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
@@ -818,17 +1809,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete_block(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s);
- if (!error) {
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
- s->here);
- ext4_xattr_cache_insert(ext4_mb_cache,
- bs->bh);
- }
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ true /* is_block */);
+ if (!error)
+ ext4_xattr_block_cache_insert(ea_block_cache,
+ bs->bh);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -854,6 +1843,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
header(s->base)->h_refcount = cpu_to_le32(1);
s->here = ENTRY(s->base + offset);
s->end = s->base + bs->bh->b_size;
+
+ /*
+ * If existing entry points to an xattr inode, we need
+ * to prevent ext4_xattr_set_entry() from decrementing
+ * ref count on it because the reference belongs to the
+ * original block. In this case, make the entry look
+ * like it has an empty value.
+ */
+ if (!s->not_found && s->here->e_value_inum) {
+ /*
+ * Defer quota free call for previous inode
+ * until success is guaranteed.
+ */
+ old_ea_inode_size = le32_to_cpu(
+ s->here->e_value_size);
+ s->here->e_value_inum = 0;
+ s->here->e_value_size = 0;
+ }
}
} else {
/* Allocate a buffer where we construct the new block. */
@@ -870,17 +1877,33 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base), s->here);
+
+ if (i->value && s->here->e_value_inum) {
+ unsigned int ea_ino;
+
+ /*
+ * A ref count on ea_inode has been taken as part of the call to
+ * ext4_xattr_set_entry() above. We would like to drop this
+ * extra ref but we have to wait until the xattr block is
+ * initialized and has its own ref count on the ea_inode.
+ */
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
+ if (error) {
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
inserted:
if (!IS_LAST_ENTRY(s->first)) {
- new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
+ new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
+ &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
@@ -925,7 +1948,7 @@ inserted:
EXT4_C2B(EXT4_SB(sb),
1));
brelse(new_bh);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
new_bh = NULL;
goto inserted;
@@ -944,8 +1967,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_touch(ext4_mb_cache, ce);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -984,6 +2007,22 @@ getblk_failed:
EXT4_FREE_BLOCKS_METADATA);
goto cleanup;
}
+ error = ext4_xattr_inode_inc_ref_all(handle, inode,
+ ENTRY(header(s->base)+1));
+ if (error)
+ goto getblk_failed;
+ if (ea_inode) {
+ /* Drop the extra ref on ea_inode. */
+ error = ext4_xattr_inode_dec_ref(handle,
+ ea_inode);
+ if (error)
+ ext4_warning_inode(ea_inode,
+ "dec ref error=%d",
+ error);
+ iput(ea_inode);
+ ea_inode = NULL;
+ }
+
lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, new_bh);
if (error) {
@@ -995,7 +2034,7 @@ getblk_failed:
ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, new_bh);
error = ext4_handle_dirty_metadata(handle, inode,
new_bh);
if (error)
@@ -1003,17 +2042,40 @@ getblk_failed:
}
}
+ if (old_ea_inode_size)
+ ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
+
/* Update the inode. */
EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
/* Drop the previous xattr block. */
- if (bs->bh && bs->bh != new_bh)
- ext4_xattr_release_block(handle, inode, bs->bh);
+ if (bs->bh && bs->bh != new_bh) {
+ struct ext4_xattr_inode_array *ea_inode_array = NULL;
+
+ ext4_xattr_release_block(handle, inode, bs->bh,
+ &ea_inode_array,
+ 0 /* extra_credits */);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ }
error = 0;
cleanup:
+ if (ea_inode) {
+ int error2;
+
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
+
+ /* If there was an error, revert the quota charge. */
+ if (error)
+ ext4_xattr_inode_free_quota(inode,
+ i_size_read(ea_inode));
+ iput(ea_inode);
+ }
if (ce)
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1070,7 +2132,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error) {
if (error == -ENOSPC &&
ext4_has_inline_data(inode)) {
@@ -1082,7 +2144,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_find(inode, i, is);
if (error)
return error;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ false /* is_block */);
}
if (error)
return error;
@@ -1098,7 +2161,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
return 0;
}
-static int ext4_xattr_ibody_set(struct inode *inode,
+static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -1108,7 +2171,7 @@ static int ext4_xattr_ibody_set(struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error)
return error;
header = IHDR(inode, ext4_raw_inode(&is->iloc));
@@ -1127,12 +2190,31 @@ static int ext4_xattr_value_same(struct ext4_xattr_search *s,
{
void *value;
+ /* When e_value_inum is set the value is stored externally. */
+ if (s->here->e_value_inum)
+ return 0;
if (le32_to_cpu(s->here->e_value_size) != i->value_len)
return 0;
value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
return !memcmp(value, i->value, i->value_len);
}
+static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
+{
+ struct buffer_head *bh;
+ int error;
+
+ if (!EXT4_I(inode)->i_file_acl)
+ return NULL;
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh)
+ return ERR_PTR(-EIO);
+ error = ext4_xattr_check_block(inode, bh);
+ if (error)
+ return ERR_PTR(error);
+ return bh;
+}
+
/*
* ext4_xattr_set_handle()
*
@@ -1155,7 +2237,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
.name = name,
.value = value,
.value_len = value_len,
-
+ .in_inode = 0,
};
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
@@ -1173,6 +2255,28 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
ext4_write_lock_xattr(inode, &no_expand);
+ /* Check journal credits under write lock. */
+ if (ext4_handle_valid(handle)) {
+ struct buffer_head *bh;
+ int credits;
+
+ bh = ext4_xattr_get_block(inode);
+ if (IS_ERR(bh)) {
+ error = PTR_ERR(bh);
+ goto cleanup;
+ }
+
+ credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+ value_len,
+ flags & XATTR_CREATE);
+ brelse(bh);
+
+ if (!ext4_handle_has_enough_credits(handle, credits)) {
+ error = -ENOSPC;
+ goto cleanup;
+ }
+ }
+
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
goto cleanup;
@@ -1202,9 +2306,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (flags & XATTR_CREATE)
goto cleanup;
}
+
if (!value) {
if (!is.s.not_found)
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else {
@@ -1215,7 +2320,12 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
goto cleanup;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ (EXT4_XATTR_SIZE(i.value_len) >
+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
+ i.in_inode = 1;
+retry_inode:
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) {
i.value = NULL;
error = ext4_xattr_block_set(handle, inode, &i, &bs);
@@ -1226,11 +2336,20 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
goto cleanup;
}
error = ext4_xattr_block_set(handle, inode, &i, &bs);
- if (error)
- goto cleanup;
- if (!is.s.not_found) {
+ if (!error && !is.s.not_found) {
i.value = NULL;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i,
+ &is);
+ } else if (error == -ENOSPC) {
+ /*
+ * Xattr does not fit in the block, store at
+ * external inode if possible.
+ */
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ !i.in_inode) {
+ i.in_inode = 1;
+ goto retry_inode;
+ }
}
}
}
@@ -1256,6 +2375,33 @@ cleanup:
return error;
}
+int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
+ bool is_create, int *credits)
+{
+ struct buffer_head *bh;
+ int err;
+
+ *credits = 0;
+
+ if (!EXT4_SB(inode->i_sb)->s_journal)
+ return 0;
+
+ down_read(&EXT4_I(inode)->xattr_sem);
+
+ bh = ext4_xattr_get_block(inode);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ } else {
+ *credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
+ value_len, is_create);
+ brelse(bh);
+ err = 0;
+ }
+
+ up_read(&EXT4_I(inode)->xattr_sem);
+ return err;
+}
+
/*
* ext4_xattr_set()
*
@@ -1269,13 +2415,20 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
const void *value, size_t value_len, int flags)
{
handle_t *handle;
+ struct super_block *sb = inode->i_sb;
int error, retries = 0;
- int credits = ext4_jbd2_credits_xattr(inode);
+ int credits;
error = dquot_initialize(inode);
if (error)
return error;
+
retry:
+ error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE,
+ &credits);
+ if (error)
+ return error;
+
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
@@ -1286,7 +2439,7 @@ retry:
value, value_len, flags);
error2 = ext4_journal_stop(handle);
if (error == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries))
+ ext4_should_retry_alloc(sb, &retries))
goto retry;
if (error == 0)
error = error2;
@@ -1311,7 +2464,7 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
last->e_value_offs = cpu_to_le16(new_offs);
@@ -1331,18 +2484,16 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
struct ext4_xattr_ibody_find *is = NULL;
struct ext4_xattr_block_find *bs = NULL;
char *buffer = NULL, *b_entry_name = NULL;
- size_t value_offs, value_size;
+ size_t value_size = le32_to_cpu(entry->e_value_size);
struct ext4_xattr_info i = {
.value = NULL,
.value_len = 0,
.name_index = entry->e_name_index,
+ .in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
int error;
- value_offs = le16_to_cpu(entry->e_value_offs);
- value_size = le32_to_cpu(entry->e_value_size);
-
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
buffer = kmalloc(value_size, GFP_NOFS);
@@ -1358,7 +2509,15 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
bs->bh = NULL;
/* Save the entry name and the entry value */
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
+ if (error)
+ goto out;
+ } else {
+ size_t value_offs = le16_to_cpu(entry->e_value_offs);
+ memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ }
+
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
b_entry_name[entry->e_name_len] = '\0';
i.name = b_entry_name;
@@ -1372,11 +2531,10 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
goto out;
/* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(inode, &i, is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto out;
- i.name = b_entry_name;
i.value = buffer;
i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs);
@@ -1420,9 +2578,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
last = IFIRST(header);
/* Find the entry best suited to be pushed into EA block */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
+ total_size = EXT4_XATTR_LEN(last->e_name_len);
+ if (!last->e_value_inum)
+ total_size += EXT4_XATTR_SIZE(
+ le32_to_cpu(last->e_value_size));
if (total_size <= bfree &&
total_size < min_total_size) {
if (total_size + ifree < isize_diff) {
@@ -1441,8 +2600,10 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
}
entry_size = EXT4_XATTR_LEN(entry->e_name_len);
- total_size = entry_size +
- EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+ total_size = entry_size;
+ if (!entry->e_value_inum)
+ total_size += EXT4_XATTR_SIZE(
+ le32_to_cpu(entry->e_value_size));
error = ext4_xattr_move_to_block(handle, inode, raw_inode,
entry);
if (error)
@@ -1571,51 +2732,172 @@ cleanup:
return error;
}
+#define EIA_INCR 16 /* must be 2^n */
+#define EIA_MASK (EIA_INCR - 1)
+/* Add the large xattr @inode into @ea_inode_array for deferred iput().
+ * If @ea_inode_array is new or full it will be grown and the old
+ * contents copied over.
+ */
+static int
+ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ struct inode *inode)
+{
+ if (*ea_inode_array == NULL) {
+ /*
+ * Start with 15 inodes, so it fits into a power-of-two size.
+ * If *ea_inode_array is NULL, this is essentially offsetof()
+ */
+ (*ea_inode_array) =
+ kmalloc(offsetof(struct ext4_xattr_inode_array,
+ inodes[EIA_MASK]),
+ GFP_NOFS);
+ if (*ea_inode_array == NULL)
+ return -ENOMEM;
+ (*ea_inode_array)->count = 0;
+ } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
+ /* expand the array once all 15 + n * 16 slots are full */
+ struct ext4_xattr_inode_array *new_array = NULL;
+ int count = (*ea_inode_array)->count;
+
+ /* if new_array is NULL, this is essentially offsetof() */
+ new_array = kmalloc(
+ offsetof(struct ext4_xattr_inode_array,
+ inodes[count + EIA_INCR]),
+ GFP_NOFS);
+ if (new_array == NULL)
+ return -ENOMEM;
+ memcpy(new_array, *ea_inode_array,
+ offsetof(struct ext4_xattr_inode_array, inodes[count]));
+ kfree(*ea_inode_array);
+ *ea_inode_array = new_array;
+ }
+ (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
+ return 0;
+}
/*
* ext4_xattr_delete_inode()
*
- * Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed. We have exclusive
- * access to the inode.
+ * Free extended attribute resources associated with this inode. Traverse
+ * all entries and decrement reference on any xattr inodes associated with this
+ * inode. This is called immediately before an inode is freed. We have exclusive
+ * access to the inode. If an orphan inode is deleted it will also release its
+ * references on xattr block and xattr inodes.
*/
-void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
+int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
{
struct buffer_head *bh = NULL;
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_iloc iloc = { .bh = NULL };
+ struct ext4_xattr_entry *entry;
+ int error;
- if (!EXT4_I(inode)->i_file_acl)
- goto cleanup;
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
- EXT4_I(inode)->i_file_acl);
+ error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
+ NULL /* bh */,
+ false /* dirty */,
+ false /* block_csum */);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
goto cleanup;
}
- if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
- BHDR(bh)->h_blocks != cpu_to_le32(1)) {
- EXT4_ERROR_INODE(inode, "bad block %llu",
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
+
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "inode loc (error %d)", error);
+ goto cleanup;
+ }
+
+ error = ext4_journal_get_write_access(handle, iloc.bh);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "write access (error %d)",
+ error);
+ goto cleanup;
+ }
+
+ header = IHDR(inode, ext4_raw_inode(&iloc));
+ if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC))
+ ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh,
+ IFIRST(header),
+ false /* block_csum */,
+ ea_inode_array,
+ extra_credits,
+ false /* skip_quota */);
}
- ext4_xattr_release_block(handle, inode, bh);
- EXT4_I(inode)->i_file_acl = 0;
+ if (EXT4_I(inode)->i_file_acl) {
+ bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
+ if (!bh) {
+ EXT4_ERROR_INODE(inode, "block %llu read error",
+ EXT4_I(inode)->i_file_acl);
+ error = -EIO;
+ goto cleanup;
+ }
+ error = ext4_xattr_check_block(inode, bh);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
+ EXT4_I(inode)->i_file_acl, error);
+ goto cleanup;
+ }
+
+ if (ext4_has_feature_ea_inode(inode->i_sb)) {
+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry))
+ if (entry->e_value_inum)
+ ext4_xattr_inode_free_quota(inode,
+ le32_to_cpu(entry->e_value_size));
+
+ }
+
+ ext4_xattr_release_block(handle, inode, bh, ea_inode_array,
+ extra_credits);
+ /*
+ * Update i_file_acl value in the same transaction that releases
+ * block.
+ */
+ EXT4_I(inode)->i_file_acl = 0;
+ error = ext4_mark_inode_dirty(handle, inode);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
+ error);
+ goto cleanup;
+ }
+ }
+ error = 0;
cleanup:
+ brelse(iloc.bh);
brelse(bh);
+ return error;
+}
+
+void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
+{
+ int idx;
+
+ if (ea_inode_array == NULL)
+ return;
+
+ for (idx = 0; idx < ea_inode_array->count; ++idx)
+ iput(ea_inode_array->inodes[idx]);
+ kfree(ea_inode_array);
}
/*
- * ext4_xattr_cache_insert()
+ * ext4_xattr_block_cache_insert()
*
- * Create a new entry in the extended attribute cache, and insert
+ * Create a new entry in the extended attribute block cache, and insert
* it unless such an entry is already in the cache.
*
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
+ struct buffer_head *bh)
{
struct ext4_xattr_header *header = BHDR(bh);
__u32 hash = le32_to_cpu(header->h_hash);
@@ -1623,7 +2905,9 @@ ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
EXT4_XATTR_REFCOUNT_MAX;
int error;
- error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ if (!ea_block_cache)
+ return;
+ error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
if (error == -EBUSY)
@@ -1655,11 +2939,11 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
entry1->e_name_index != entry2->e_name_index ||
entry1->e_name_len != entry2->e_name_len ||
entry1->e_value_size != entry2->e_value_size ||
+ entry1->e_value_inum != entry2->e_value_inum ||
memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
return 1;
- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
- return -EFSCORRUPTED;
- if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
+ if (!entry1->e_value_inum &&
+ memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
(char *)header2 + le16_to_cpu(entry2->e_value_offs),
le32_to_cpu(entry1->e_value_size)))
return 1;
@@ -1673,7 +2957,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
}
/*
- * ext4_xattr_cache_find()
+ * ext4_xattr_block_cache_find()
*
* Find an identical extended attribute block.
*
@@ -1681,30 +2965,33 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
* not found or an error occurred.
*/
static struct buffer_head *
-ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ext4_xattr_block_cache_find(struct inode *inode,
+ struct ext4_xattr_header *header,
+ struct mb_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+ if (!ea_block_cache)
+ return NULL;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
- ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
- bh = sb_bread(inode->i_sb, ce->e_block);
+ bh = sb_bread(inode->i_sb, ce->e_value);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
- (unsigned long) ce->e_block);
+ (unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
@@ -1717,30 +3004,22 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
*
* Compute the hash of an extended attribute.
*/
-static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
+static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
+ size_t value_count)
{
__u32 hash = 0;
- char *name = entry->e_name;
- int n;
- for (n = 0; n < entry->e_name_len; n++) {
+ while (name_len--) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
*name++;
}
-
- if (entry->e_value_size != 0) {
- __le32 *value = (__le32 *)((char *)header +
- le16_to_cpu(entry->e_value_offs));
- for (n = (le32_to_cpu(entry->e_value_size) +
- EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
- hash = (hash << VALUE_HASH_SHIFT) ^
- (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
- le32_to_cpu(*value++);
- }
+ while (value_count--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
}
- entry->e_hash = cpu_to_le32(hash);
+ return cpu_to_le32(hash);
}
#undef NAME_HASH_SHIFT
@@ -1753,13 +3032,11 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
*
* Re-compute the extended attribute hash value after an entry has changed.
*/
-static void ext4_xattr_rehash(struct ext4_xattr_header *header,
- struct ext4_xattr_entry *entry)
+static void ext4_xattr_rehash(struct ext4_xattr_header *header)
{
struct ext4_xattr_entry *here;
__u32 hash = 0;
- ext4_xattr_hash_entry(header, entry);
here = ENTRY(header+1);
while (!IS_LAST_ENTRY(here)) {
if (!here->e_hash) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 099c8b670ef5..0d2dde1fa87a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
+ __le32 e_value_inum; /* inode in which the value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
@@ -69,6 +69,13 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
+/*
+ * The minimum size of EA value when you start storing it in an external inode
+ * size of block - size of header - size of 1 entry - 4 null bytes
+*/
+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
+
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
@@ -77,10 +84,11 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
struct ext4_xattr_info {
- int name_index;
const char *name;
const void *value;
size_t value_len;
+ int name_index;
+ int in_inode;
};
struct ext4_xattr_search {
@@ -96,6 +104,11 @@ struct ext4_xattr_ibody_find {
struct ext4_iloc iloc;
};
+struct ext4_xattr_inode_array {
+ unsigned int count; /* # of used items in the array */
+ struct inode *inodes[0];
+};
+
extern const struct xattr_handler ext4_xattr_user_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
@@ -139,8 +152,16 @@ extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
+ bool is_create, int *credits);
+extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ struct buffer_head *block_bh, size_t value_len,
+ bool is_create);
-extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_inode_array **array,
+ int extra_credits);
+extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -169,3 +190,11 @@ static inline int ext4_init_security(handle_t *handle, struct inode *inode,
return 0;
}
#endif
+
+#ifdef CONFIG_LOCKDEP
+extern void ext4_xattr_inode_set_class(struct inode *ea_inode);
+#else
+static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { }
+#endif
+
+extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b19be429d655..d818fd236787 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -10,13 +10,14 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete_block()).
+ * mb_cache_entry_delete()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
- * They use hash of a block contents as a key and block number as a value.
- * That's why keys need not be unique (different xattr blocks may end up having
- * the same hash). However block number always uniquely identifies a cache
- * entry.
+ * Ext4 also uses it for deduplication of xattr values stored in inodes.
+ * They use hash of data as a key and provide a value that may represent a
+ * block or inode number. That's why keys need not be unique (hash of different
+ * data may be the same). However user provided value always uniquely
+ * identifies a cache entry.
*
* We provide functions for creation and removal of entries, search by key,
* and a special "delete entry with given key-value pair" operation. Fixed
@@ -62,15 +63,15 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
* @cache - cache where the entry should be created
* @mask - gfp mask with which the entry should be allocated
* @key - key of the entry
- * @block - block that contains data
- * @reusable - is the block reusable by other inodes?
+ * @value - value of the entry
+ * @reusable - is the entry reusable by others?
*
- * Creates entry in @cache with key @key and records that data is stored in
- * block @block. The function returns -EBUSY if entry with the same key
- * and for the same block already exists in cache. Otherwise 0 is returned.
+ * Creates entry in @cache with key @key and value @value. The function returns
+ * -EBUSY if entry with the same key and value already exists in cache.
+ * Otherwise 0 is returned.
*/
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
- sector_t block, bool reusable)
+ u64 value, bool reusable)
{
struct mb_cache_entry *entry, *dup;
struct hlist_bl_node *dup_node;
@@ -91,12 +92,12 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
/* One ref for hash, one ref returned */
atomic_set(&entry->e_refcnt, 1);
entry->e_key = key;
- entry->e_block = block;
+ entry->e_value = value;
entry->e_reusable = reusable;
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
- if (dup->e_key == key && dup->e_block == block) {
+ if (dup->e_key == key && dup->e_value == value) {
hlist_bl_unlock(head);
kmem_cache_free(mb_entry_cache, entry);
return -EBUSY;
@@ -187,13 +188,13 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
EXPORT_SYMBOL(mb_cache_entry_find_next);
/*
- * mb_cache_entry_get - get a cache entry by block number (and key)
+ * mb_cache_entry_get - get a cache entry by value (and key)
* @cache - cache we work with
- * @key - key of block number @block
- * @block - block number
+ * @key - key
+ * @value - value
*/
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
- sector_t block)
+ u64 value)
{
struct hlist_bl_node *node;
struct hlist_bl_head *head;
@@ -202,7 +203,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_block == block) {
+ if (entry->e_key == key && entry->e_value == value) {
atomic_inc(&entry->e_refcnt);
goto out;
}
@@ -214,15 +215,14 @@ out:
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete_block - remove information about block from cache
+/* mb_cache_entry_delete - remove a cache entry
* @cache - cache we work with
- * @key - key of block @block
- * @block - block number
+ * @key - key
+ * @value - value
*
- * Remove entry from cache @cache with key @key with data stored in @block.
+ * Remove entry from cache @cache with key @key and value @value.
*/
-void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
- sector_t block)
+void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
{
struct hlist_bl_node *node;
struct hlist_bl_head *head;
@@ -231,7 +231,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_block == block) {
+ if (entry->e_key == key && entry->e_value == value) {
/* We keep hash list reference to keep entry alive */
hlist_bl_del_init(&entry->e_hash_list);
hlist_bl_unlock(head);
@@ -248,7 +248,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
}
hlist_bl_unlock(head);
}
-EXPORT_SYMBOL(mb_cache_entry_delete_block);
+EXPORT_SYMBOL(mb_cache_entry_delete);
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 48813aeaab80..53a17496c5c5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1910,6 +1910,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
qsize_t space, cur_space;
qsize_t rsv_space = 0;
+ qsize_t inode_usage = 1;
struct dquot *transfer_from[MAXQUOTAS] = {};
int cnt, ret = 0;
char is_valid[MAXQUOTAS] = {};
@@ -1919,6 +1920,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (IS_NOQUOTA(inode))
return 0;
+
+ if (inode->i_sb->dq_op->get_inode_usage) {
+ ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage);
+ if (ret)
+ return ret;
+ }
+
/* Initialize the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
warn_to[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1946,7 +1954,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue;
is_valid[cnt] = 1;
transfer_from[cnt] = i_dquot(inode)[cnt];
- ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
+ ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
if (ret)
goto over_quota;
ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
@@ -1963,7 +1971,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
int wtype;
- wtype = info_idq_free(transfer_from[cnt], 1);
+ wtype = info_idq_free(transfer_from[cnt], inode_usage);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_inodes[cnt],
transfer_from[cnt], wtype);
@@ -1971,13 +1979,13 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_space[cnt],
transfer_from[cnt], wtype);
- dquot_decr_inodes(transfer_from[cnt], 1);
+ dquot_decr_inodes(transfer_from[cnt], inode_usage);
dquot_decr_space(transfer_from[cnt], cur_space);
dquot_free_reserved_space(transfer_from[cnt],
rsv_space);
}
- dquot_incr_inodes(transfer_to[cnt], 1);
+ dquot_incr_inodes(transfer_to[cnt], inode_usage);
dquot_incr_space(transfer_to[cnt], cur_space);
dquot_resv_space(transfer_to[cnt], rsv_space);