aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/compression.c7
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/ioctl.c26
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c4
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/tree-checker.c20
-rw-r--r--fs/btrfs/tree-log.c52
-rw-r--r--fs/btrfs/uuid-tree.c2
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c25
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/readdir.c63
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2inode.c1
-rw-r--r--fs/cifs/smb2ops.c19
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ext4/block_validity.c6
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode-test.c2
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c36
-rw-r--r--fs/ext4/super.c143
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c7
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namespace.c12
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/nsfs.c3
-rw-r--r--fs/ocfs2/dlmglue.c1
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/overlayfs/copy_up.c53
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/export.c80
-rw-r--r--fs/overlayfs/inode.c8
-rw-r--r--fs/overlayfs/namei.c52
-rw-r--r--fs/overlayfs/overlayfs.h34
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/super.c24
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c7
-rw-r--r--fs/proc/stat.c4
-rw-r--r--fs/pstore/ram.c13
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/super.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c18
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c21
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h29
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c64
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c96
-rw-r--r--fs/xfs/xfs_bmap_util.c12
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_mount.c168
-rw-r--r--fs/xfs/xfs_trace.h21
71 files changed, 882 insertions, 411 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ee834ef7beb4..43e1660f450f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
- bio_associate_blkg_from_css(bio, blkcg_css);
+ kthread_associate_blkcg(blkcg_css);
}
refcount_set(&cb->pending_bios, 1);
@@ -491,6 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
+ if (blkcg_css)
+ bio->bi_opf |= REQ_CGROUP_PUNT;
bio_add_page(bio, page, PAGE_SIZE, 0);
}
if (bytes_left < PAGE_SIZE) {
@@ -517,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
+ if (blkcg_css)
+ kthread_associate_blkcg(NULL);
+
return 0;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5b6e86aaf2e1..24658b5a5787 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -379,7 +379,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = rb_entry(node, struct tree_mod_elem, node);
- if (tm->seq > min_seq)
+ if (tm->seq >= min_seq)
continue;
rb_erase(node, tm_root);
kfree(tm);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b2e8fd8a8e59..54efb21c2727 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2787,7 +2787,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
/* file-item.c */
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
+ struct btrfs_root *root, u64 bytenr, u64 len);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u8 *dst);
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 153f71a5bba9..274318e9114e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1869,8 +1869,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
if (head->is_data) {
- ret = btrfs_del_csums(trans, fs_info, head->bytenr,
- head->num_bytes);
+ ret = btrfs_del_csums(trans, fs_info->csum_root,
+ head->bytenr, head->num_bytes);
}
}
@@ -3175,7 +3175,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (is_data) {
- ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
+ ret = btrfs_del_csums(trans, info->csum_root, bytenr,
+ num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3799,6 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 flags, int delalloc)
{
int ret = 0;
+ int cache_block_group_error = 0;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
@@ -3958,7 +3960,20 @@ have_block_group:
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
- BUG_ON(ret < 0);
+
+ /*
+ * If we get ENOMEM here or something else we want to
+ * try other block groups, because it may not be fatal.
+ * However if we can't find anything else we need to
+ * save our return here so that we return the actual
+ * error that caused problems, not ENOSPC.
+ */
+ if (ret < 0) {
+ if (!cache_block_group_error)
+ cache_block_group_error = ret;
+ ret = 0;
+ goto loop;
+ }
ret = 0;
}
@@ -4045,7 +4060,7 @@ loop:
if (ret > 0)
goto search;
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && !cache_block_group_error) {
/*
* Use ffe_ctl->total_free_space as fallback if we can't find
* any contiguous hole.
@@ -4056,6 +4071,8 @@ loop:
space_info->max_extent_size = ffe_ctl.max_extent_size;
spin_unlock(&space_info->lock);
ins->offset = ffe_ctl.max_extent_size;
+ } else if (ret == -ENOSPC) {
+ ret = cache_block_group_error;
}
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb8bd0258360..2f4802f405a2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5074,12 +5074,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
again:
ret = radix_tree_preload(GFP_NOFS);
- if (ret)
+ if (ret) {
+ exists = ERR_PTR(ret);
goto free_eb;
+ }
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
start >> PAGE_SHIFT, eb);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 3270a40b0777..b1bfdc5c1387 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -590,9 +590,9 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
* range of bytes.
*/
int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+ struct btrfs_root *root, u64 bytenr, u64 len)
{
- struct btrfs_root *root = fs_info->csum_root;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path;
struct btrfs_key key;
u64 end_byte = bytenr + len;
@@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int blocksize_bits = fs_info->sb->s_blocksize_bits;
+ ASSERT(root == fs_info->csum_root ||
+ root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0cb43b682789..8d47c76b7bd1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2599,8 +2599,8 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
}
}
- if (clone_info) {
- u64 clone_len = drop_end - cur_offset;
+ if (clone_info && drop_end > clone_info->file_offset) {
+ u64 clone_len = drop_end - clone_info->file_offset;
ret = btrfs_insert_clone_extent(trans, inode, path,
clone_info, clone_len);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 56032c518b26..5509c41a4f43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1479,10 +1479,10 @@ next_slot:
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
/*
- * If extent we got ends before our range starts, skip
- * to next extent
+ * If the extent we got ends before our current offset,
+ * skip to the next extent.
*/
- if (extent_end <= start) {
+ if (extent_end <= cur_offset) {
path->slots[0]++;
goto next_slot;
}
@@ -5728,7 +5728,6 @@ static void inode_tree_add(struct inode *inode)
static void inode_tree_del(struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int empty = 0;
@@ -5741,7 +5740,6 @@ static void inode_tree_del(struct inode *inode)
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
- synchronize_srcu(&fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
@@ -9556,9 +9554,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
btrfs_init_log_ctx(&ctx_dest, new_inode);
/* close the race window with snapshot create/destroy ioctl */
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- down_read(&fs_info->subvol_sem);
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
/*
@@ -9792,9 +9789,8 @@ out_fail:
ret = ret ? ret : ret2;
}
out_notrans:
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
- up_read(&fs_info->subvol_sem);
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
ASSERT(list_empty(&ctx_root.list));
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a1ee0b775e65..18e328ce4b54 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -704,11 +704,17 @@ static noinline int create_subvol(struct inode *dir,
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
@@ -3720,24 +3726,18 @@ process_slot:
ret = 0;
if (last_dest_end < destoff + len) {
- struct btrfs_clone_extent_info clone_info = { 0 };
/*
- * We have an implicit hole (NO_HOLES feature is enabled) that
- * fully or partially overlaps our cloning range at its end.
+ * We have an implicit hole that fully or partially overlaps our
+ * cloning range at its end. This means that we either have the
+ * NO_HOLES feature enabled or the implicit hole happened due to
+ * mixing buffered and direct IO writes against this file.
*/
btrfs_release_path(path);
path->leave_spinning = 0;
- /*
- * We are dealing with a hole and our clone_info already has a
- * disk_offset of 0, we only need to fill the data length and
- * file offset.
- */
- clone_info.data_len = destoff + len - last_dest_end;
- clone_info.file_offset = last_dest_end;
ret = btrfs_punch_hole_range(inode, path,
last_dest_end, destoff + len - 1,
- &clone_info, &trans);
+ NULL, &trans);
if (ret)
goto out;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93aeb2e539a4..d4282e12f2a6 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3232,12 +3232,12 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
btrfs_warn(fs_info,
- "qgroup rescan init failed, qgroup is not enabled");
+ "qgroup rescan init failed, qgroup rescan is not queued");
ret = -EINVAL;
} else if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_ON)) {
btrfs_warn(fs_info,
- "qgroup rescan init failed, qgroup rescan is not queued");
+ "qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d897a8e5e430..c58245797f30 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4552,6 +4552,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
+ list_add_tail(&reloc_root->root_list, &reloc_roots);
goto out_free;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index ae2db5eb1549..091e5bc8c7ea 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7084,12 +7084,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
spin_unlock(&send_root->root_item_lock);
/*
- * This is done when we lookup the root, it should already be complete
- * by the time we get here.
- */
- WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
-
- /*
* Userspace tools do the checks and warn the user if it's
* not RO.
*/
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 1a846bf6e197..914eea5ba6a7 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -452,9 +452,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->fs_info->tree_root = root;
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
- if (!root->node) {
+ if (IS_ERR(root->node)) {
test_std_err(TEST_ALLOC_EXTENT_BUFFER);
- ret = -ENOMEM;
+ ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 09aaca1efd62..ac035a6fa003 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -484,9 +484,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
- if (!root->node) {
+ if (IS_ERR(root->node)) {
test_err("couldn't allocate dummy buffer");
- ret = -ENOMEM;
+ ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 493d4d9e0f79..97f3520b8d98 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -227,7 +227,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
*/
if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
file_extent_err(leaf, slot,
- "invalid item size, have %u expect [%lu, %u)",
+ "invalid item size, have %u expect [%zu, %u)",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
SZ_4K);
return -EUCLEAN;
@@ -332,7 +332,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
}
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
- int slot)
+ int slot, struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
u32 sectorsize = fs_info->sectorsize;
@@ -356,6 +356,20 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
}
+ if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
+ u64 prev_csum_end;
+ u32 prev_item_size;
+
+ prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
+ prev_csum_end = (prev_item_size / csumsize) * sectorsize;
+ prev_csum_end += prev_key->offset;
+ if (prev_csum_end > key->offset) {
+ generic_err(leaf, slot - 1,
+"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
+ prev_csum_end, key->offset);
+ return -EUCLEAN;
+ }
+ }
return 0;
}
@@ -1355,7 +1369,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_item(leaf, key, slot, prev_key);
break;
case BTRFS_EXTENT_CSUM_KEY:
- ret = check_csum_item(leaf, key, slot);
+ ret = check_csum_item(leaf, key, slot, prev_key);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6f757361db53..d3f115909ff0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -808,7 +808,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_del_csums(trans, fs_info,
+ ret = btrfs_del_csums(trans,
+ fs_info->csum_root,
sums->bytenr,
sums->len);
if (!ret)
@@ -3909,6 +3910,28 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
return 0;
}
+static int log_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log_root,
+ struct btrfs_ordered_sum *sums)
+{
+ int ret;
+
+ /*
+ * Due to extent cloning, we might have logged a csum item that covers a
+ * subrange of a cloned extent, and later we can end up logging a csum
+ * item for a larger subrange of the same extent or the entire range.
+ * This would leave csum items in the log tree that cover the same range
+ * and break the searches for checksums in the log tree, resulting in
+ * some checksums missing in the fs/subvolume tree. So just delete (or
+ * trim and adjust) any existing csum items in the log for this range.
+ */
+ ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
+ if (ret)
+ return ret;
+
+ return btrfs_csum_file_blocks(trans, log_root, sums);
+}
+
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *dst_path,
@@ -4054,7 +4077,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log, sums);
+ ret = log_csums(trans, log, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4274,7 +4297,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log_root, sums);
+ ret = log_csums(trans, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -6294,9 +6317,28 @@ again:
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
+
+ /*
+ * We didn't find the subvol, likely because it was
+ * deleted. This is ok, simply skip this log and go to
+ * the next one.
+ *
+ * We need to exclude the root because we can't have
+ * other log replays overwriting this log as we'll read
+ * it back in a few more times. This will keep our
+ * block from being modified, and we'll just bail for
+ * each subsequent pass.
+ */
+ if (ret == -ENOENT)
+ ret = btrfs_pin_extent_for_log_replay(fs_info,
+ log->node->start,
+ log->node->len);
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
kfree(log);
+
+ if (!ret)
+ goto next;
btrfs_handle_fs_error(fs_info, ret,
"Couldn't read target root for tree log recovery.");
goto error;
@@ -6328,7 +6370,6 @@ again:
&root->highest_objectid);
}
- key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
@@ -6336,9 +6377,10 @@ again:
if (ret)
goto error;
-
+next:
if (found_key.offset == 0)
break;
+ key.offset = found_key.offset - 1;
}
btrfs_release_path(path);
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 91caab63bdf5..76b84f2397b1 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -324,6 +324,8 @@ again_search_slot:
}
if (ret < 0 && ret != -ENOENT)
goto out;
+ key.offset++;
+ goto again_search_slot;
}
item_size -= sizeof(subid_le);
offset += sizeof(subid_le);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d8e5560db285..a6d3f08bfff3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -61,7 +61,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C3] = {
.sub_stripes = 1,
.dev_stripes = 1,
- .devs_max = 0,
+ .devs_max = 3,
.devs_min = 3,
.tolerated_failures = 2,
.devs_increment = 3,
@@ -73,7 +73,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C4] = {
.sub_stripes = 1,
.dev_stripes = 1,
- .devs_max = 0,
+ .devs_max = 4,
.devs_min = 4,
.tolerated_failures = 3,
.devs_increment = 4,
diff --git a/fs/buffer.c b/fs/buffer.c
index d8c7242426bb..e94a6619464c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3034,8 +3034,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
void guard_bio_eod(int op, struct bio *bio)
{
sector_t maxsector;
- struct bio_vec *bvec = bio_last_bvec_all(bio);
- unsigned truncated_bytes;
struct hd_struct *part;
rcu_read_lock();
@@ -3061,28 +3059,7 @@ void guard_bio_eod(int op, struct bio *bio)
if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
return;
- /* Uhhuh. We've got a bio that straddles the device size! */
- truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
-
- /*
- * The bio contains more than one segment which spans EOD, just return
- * and let IO layer turn it into an EIO
- */
- if (truncated_bytes > bvec->bv_len)
- return;
-
- /* Truncate the bio.. */
- bio->bi_iter.bi_size -= truncated_bytes;
- bvec->bv_len -= truncated_bytes;
-
- /* ..and clear the end of the buffer for reads */
- if (op == REQ_OP_READ) {
- struct bio_vec bv;
-
- mp_bvec_last_segment(bvec, &bv);
- zero_user(bv.bv_page, bv.bv_offset + bv.bv_len,
- truncated_bytes);
- }
+ bio_truncate(bio, maxsector << 9);
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index fd0262ce5ad5..40705e862451 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1061,7 +1061,7 @@ cap_unix(struct cifs_ses *ses)
struct cached_fid {
bool is_valid:1; /* Do we have a useable root fid */
bool file_all_info_is_valid:1;
-
+ bool has_lease:1;
struct kref refcount;
struct cifs_fid *fid;
struct mutex fid_mutex;
@@ -1693,6 +1693,7 @@ struct cifs_fattr {
struct timespec64 cf_atime;
struct timespec64 cf_mtime;
struct timespec64 cf_ctime;
+ u32 cf_cifstag;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4f554f019a98..cc86a67225d1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -42,6 +42,7 @@
#include "cifsproto.h"
#include "cifs_unicode.h"
#include "cifs_debug.h"
+#include "smb2proto.h"
#include "fscache.h"
#include "smbdirect.h"
#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -112,6 +113,8 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
mutex_lock(&tcon->crfid.fid_mutex);
tcon->crfid.is_valid = false;
+ /* cached handle is not valid, so SMB2_CLOSE won't be sent below */
+ close_shroot_lease_locked(&tcon->crfid);
memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
mutex_unlock(&tcon->crfid.fid_mutex);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 3925a7bfc74d..d17587c2c4ab 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -139,6 +139,28 @@ retry:
dput(dentry);
}
+static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
+{
+ if (!(fattr->cf_cifsattrs & ATTR_REPARSE))
+ return false;
+ /*
+ * The DFS tags should be only intepreted by server side as per
+ * MS-FSCC 2.1.2.1, but let's include them anyway.
+ *
+ * Besides, if cf_cifstag is unset (0), then we still need it to be
+ * revalidated to know exactly what reparse point it is.
+ */
+ switch (fattr->cf_cifstag) {
+ case IO_REPARSE_TAG_DFS:
+ case IO_REPARSE_TAG_DFSR:
+ case IO_REPARSE_TAG_SYMLINK:
+ case IO_REPARSE_TAG_NFS:
+ case 0:
+ return true;
+ }
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -158,7 +180,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* is a symbolic link, DFS referral or a reparse point with a direct
* access like junctions, deduplicated files, NFS symlinks.
*/
- if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ if (reparse_file_needs_reval(fattr))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
/* non-unix readdir doesn't provide nlink */
@@ -194,19 +216,37 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
}
}
+static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info)
+{
+ const FILE_DIRECTORY_INFO *fi = info;
+
+ memset(fattr, 0, sizeof(*fattr));
+ fattr->cf_cifsattrs = le32_to_cpu(fi->ExtFileAttributes);
+ fattr->cf_eof = le64_to_cpu(fi->EndOfFile);
+ fattr->cf_bytes = le64_to_cpu(fi->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(fi->CreationTime);
+ fattr->cf_atime = cifs_NTtimeToUnix(fi->LastAccessTime);
+ fattr->cf_ctime = cifs_NTtimeToUnix(fi->ChangeTime);
+ fattr->cf_mtime = cifs_NTtimeToUnix(fi->LastWriteTime);
+}
+
void
cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
struct cifs_sb_info *cifs_sb)
{
- memset(fattr, 0, sizeof(*fattr));
- fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
- fattr->cf_eof = le64_to_cpu(info->EndOfFile);
- fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
- fattr->cf_createtime = le64_to_cpu(info->CreationTime);
- fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
- fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
- fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+ __dir_info_to_fattr(fattr, info);
+ cifs_fill_common_info(fattr, cifs_sb);
+}
+static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
+ SEARCH_ID_FULL_DIR_INFO *info,
+ struct cifs_sb_info *cifs_sb)
+{
+ __dir_info_to_fattr(fattr, info);
+
+ /* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_cifstag = le32_to_cpu(info->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -755,6 +795,11 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+ cifs_fulldir_info_to_fattr(&fattr,
+ (SEARCH_ID_FULL_DIR_INFO *)find_entry,
+ cifs_sb);
+ break;
default:
cifs_dir_info_to_fattr(&fattr,
(FILE_DIRECTORY_INFO *)find_entry,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 8b0b512c5792..afe1f03aabe3 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -67,7 +67,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
- if (oparms->tcon->use_resilient) {
+ if (oparms->tcon->use_resilient) {
/* default timeout is 0, servers pick default (120 seconds) */
nr_ioctl_req.Timeout =
cpu_to_le32(oparms->tcon->handle_timeout);
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 18c7a33adceb..5ef5e97a6d13 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -95,6 +95,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
goto finished;
}
+ memset(&oparms, 0, sizeof(struct cifs_open_parms));
oparms.tcon = tcon;
oparms.desired_access = desired_access;
oparms.disposition = create_disposition;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a5c96bc522cb..6250370c1170 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -616,6 +616,7 @@ smb2_close_cached_fid(struct kref *ref)
cfid->fid->volatile_fid);
cfid->is_valid = false;
cfid->file_all_info_is_valid = false;
+ cfid->has_lease = false;
}
}
@@ -626,13 +627,28 @@ void close_shroot(struct cached_fid *cfid)
mutex_unlock(&cfid->fid_mutex);
}
+void close_shroot_lease_locked(struct cached_fid *cfid)
+{
+ if (cfid->has_lease) {
+ cfid->has_lease = false;
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
+ }
+}
+
+void close_shroot_lease(struct cached_fid *cfid)
+{
+ mutex_lock(&cfid->fid_mutex);
+ close_shroot_lease_locked(cfid);
+ mutex_unlock(&cfid->fid_mutex);
+}
+
void
smb2_cached_lease_break(struct work_struct *work)
{
struct cached_fid *cfid = container_of(work,
struct cached_fid, lease_break);
- close_shroot(cfid);
+ close_shroot_lease(cfid);
}
/*
@@ -773,6 +789,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
/* BB TBD check to see if oplock level check can be removed below */
if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
kref_get(&tcon->crfid.refcount);
+ tcon->crfid.has_lease = true;
smb2_parse_contexts(server, o_rsp,
&oparms.fid->epoch,
oparms.fid->lease_key, &oplock, NULL);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0ab6b1200288..9434f6dd8df3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1847,7 +1847,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
return 0;
- close_shroot(&tcon->crfid);
+ close_shroot_lease(&tcon->crfid);
rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req,
&total_len);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a18272c987fe..27d29f2eb6c8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -70,6 +70,8 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *pfid);
extern void close_shroot(struct cached_fid *cfid);
+extern void close_shroot_lease(struct cached_fid *cfid);
+extern void close_shroot_lease_locked(struct cached_fid *cfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0ec4f270139f..00b4d15bb811 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -39,6 +39,8 @@
#include <linux/atomic.h>
#include <linux/prefetch.h>
+#include "internal.h"
+
/*
* How many user pages to map in one call to get_user_pages(). This determines
* the size of a structure in the slab cache
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d31b6c72b476..dc1a1d5d825b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index d4d4fdfac1a6..1ee04e76bbe0 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -133,10 +133,13 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
{
struct rb_node *node;
struct ext4_system_zone *entry;
+ struct ext4_system_blocks *system_blks;
int first = 1;
printk(KERN_INFO "System zones: ");
- node = rb_first(&sbi->system_blks->root);
+ rcu_read_lock();
+ system_blks = rcu_dereference(sbi->system_blks);
+ node = rb_first(&system_blks->root);
while (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
@@ -144,6 +147,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
first = 0;
node = rb_next(node);
}
+ rcu_read_unlock();
printk(KERN_CONT "\n");
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9fdd2b269d61..9f00fc0bf21d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -72,6 +72,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
+ const int next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -79,8 +80,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - buf) + rlen > size))
+ else if (unlikely(next_offset > size))
error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - EXT4_DIR_REC_LEN(1) &&
+ next_offset != size))
+ error_msg = "directory entry too close to block end";
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index dc333e8e51e8..8ca4a23129aa 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -921,8 +921,8 @@ repeat_in_this_group:
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
- handle_type, nblocks,
- 0, 0);
+ handle_type, nblocks, 0,
+ ext4_trans_default_revoke_credits(sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_std_error(sb, err);
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
index 92a9da1774aa..bbce1c328d85 100644
--- a/fs/ext4/inode-test.c
+++ b/fs/ext4/inode-test.c
@@ -25,7 +25,7 @@
* For constructing the negative timestamp lower bound value.
* binary: 10000000 00000000 00000000 00000000
*/
-#define LOWER_MSB_1 (-0x80000000L)
+#define LOWER_MSB_1 (-(UPPER_MSB_0) - 1L) /* avoid overflow */
/*
* For constructing the negative timestamp upper bound value.
* binary: 11111111 11111111 11111111 11111111
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 28f28de0c1b6..629a25d999f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5692,7 +5692,7 @@ int ext4_expand_extra_isize(struct inode *inode,
error = ext4_journal_get_write_access(handle, iloc->bh);
if (error) {
brelse(iloc->bh);
- goto out_stop;
+ goto out_unlock;
}
error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc,
@@ -5702,8 +5702,8 @@ int ext4_expand_extra_isize(struct inode *inode,
if (!error)
error = rc;
+out_unlock:
ext4_write_unlock_xattr(inode, &no_expand);
-out_stop:
ext4_journal_stop(handle);
return error;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a856997d87b5..1cb42d940784 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2164,7 +2164,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
+#ifdef CONFIG_UNICODE
struct ext4_sb_info *sbi;
+#endif
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2176,12 +2178,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
csum_size = sizeof(struct ext4_dir_entry_tail);
sb = dir->i_sb;
- sbi = EXT4_SB(sb);
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
#ifdef CONFIG_UNICODE
+ sbi = EXT4_SB(sb);
if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
return -EINVAL;
@@ -2822,7 +2824,7 @@ bool ext4_empty_dir(struct inode *inode)
{
unsigned int offset;
struct buffer_head *bh;
- struct ext4_dir_entry_2 *de, *de1;
+ struct ext4_dir_entry_2 *de;
struct super_block *sb;
if (ext4_has_inline_data(inode)) {
@@ -2847,19 +2849,25 @@ bool ext4_empty_dir(struct inode *inode)
return true;
de = (struct ext4_dir_entry_2 *) bh->b_data;
- de1 = ext4_next_entry(de, sb->s_blocksize);
- if (le32_to_cpu(de->inode) != inode->i_ino ||
- le32_to_cpu(de1->inode) == 0 ||
- strcmp(".", de->name) || strcmp("..", de1->name)) {
- ext4_warning_inode(inode, "directory missing '.' and/or '..'");
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
+ 0) ||
+ le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
+ ext4_warning_inode(inode, "directory missing '.'");
+ brelse(bh);
+ return true;
+ }
+ offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
+ de = ext4_next_entry(de, sb->s_blocksize);
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
+ offset) ||
+ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
+ ext4_warning_inode(inode, "directory missing '..'");
brelse(bh);
return true;
}
- offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
- ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de1, sb->s_blocksize);
+ offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
while (offset < inode->i_size) {
- if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+ if (!(offset & (sb->s_blocksize - 1))) {
unsigned int lblock;
brelse(bh);
lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
@@ -2870,12 +2878,11 @@ bool ext4_empty_dir(struct inode *inode)
}
if (IS_ERR(bh))
return true;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
}
+ de = (struct ext4_dir_entry_2 *) (bh->b_data +
+ (offset & (sb->s_blocksize - 1)));
if (ext4_check_dir_entry(inode, NULL, de, bh,
bh->b_data, bh->b_size, offset)) {
- de = (struct ext4_dir_entry_2 *)(bh->b_data +
- sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
continue;
}
@@ -2884,7 +2891,6 @@ bool ext4_empty_dir(struct inode *inode)
return false;
}
offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de, sb->s_blocksize);
}
brelse(bh);
return true;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d82b56d9b11..2937a8873fe1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1900,6 +1900,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_debug_want_extra_isize) {
+ if ((arg & 1) ||
+ (arg < 4) ||
+ (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid want_extra_isize %d", arg);
+ return -1;
+ }
sbi->s_want_extra_isize = arg;
} else if (token == Opt_max_batch_time) {
sbi->s_max_batch_time = arg;
@@ -3554,40 +3561,6 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
-static void ext4_clamp_want_extra_isize(struct super_block *sb)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- unsigned def_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
-
- if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
- sbi->s_want_extra_isize = 0;
- return;
- }
- if (sbi->s_want_extra_isize < 4) {
- sbi->s_want_extra_isize = def_extra_isize;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
- (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size)) {
- sbi->s_want_extra_isize = def_extra_isize;
- ext4_msg(sb, KERN_INFO,
- "required extra inode space not available");
- }
-}
-
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
@@ -3795,6 +3768,68 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+ sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+ sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
+ } else {
+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+ ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+ sbi->s_first_ino);
+ goto failed_mount;
+ }
+ if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
+ (!is_power_of_2(sbi->s_inode_size)) ||
+ (sbi->s_inode_size > blocksize)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported inode size: %d",
+ sbi->s_inode_size);
+ goto failed_mount;
+ }
+ /*
+ * i_atime_extra is the last extra field available for
+ * [acm]times in struct ext4_inode. Checking for that
+ * field should suffice to ensure we have extra space
+ * for all three.
+ */
+ if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
+ sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
+ sb->s_time_gran = 1;
+ sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ sb->s_time_gran = NSEC_PER_SEC;
+ sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ sb->s_time_min = EXT4_TIMESTAMP_MIN;
+ }
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ unsigned v, max = (sbi->s_inode_size -
+ EXT4_GOOD_OLD_INODE_SIZE);
+
+ v = le16_to_cpu(es->s_want_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_want_extra_isize: %d", v);
+ goto failed_mount;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+
+ v = le16_to_cpu(es->s_min_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_min_extra_isize: %d", v);
+ goto failed_mount;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+ }
+ }
+
if (sbi->s_es->s_mount_opts[0]) {
char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
sizeof(sbi->s_es->s_mount_opts),
@@ -4033,42 +4068,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
- sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
- sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
- } else {
- sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
- sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
- if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
- ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
- sbi->s_first_ino);
- goto failed_mount;
- }
- if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
- (!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported inode size: %d",
- sbi->s_inode_size);
- goto failed_mount;
- }
- /*
- * i_atime_extra is the last extra field available for [acm]times in
- * struct ext4_inode. Checking for that field should suffice to ensure
- * we have extra space for all three.
- */
- if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
- sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
- sb->s_time_gran = 1;
- sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
- } else {
- sb->s_time_gran = NSEC_PER_SEC;
- sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
- }
-
- sb->s_time_min = EXT4_TIMESTAMP_MIN;
- }
-
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (ext4_has_feature_64bit(sb)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -4517,8 +4516,6 @@ no_journal:
} else if (ret)
goto failed_mount4a;
- ext4_clamp_want_extra_isize(sb);
-
ext4_set_resv_clusters(sb);
err = ext4_setup_system_zone(sb);
@@ -5306,8 +5303,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- ext4_clamp_want_extra_isize(sb);
-
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d5c2a3158610..a66e425884d1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1498,8 +1498,10 @@ static int __init init_hugetlbfs_fs(void)
/* other hstates are optional */
i = 0;
for_each_hstate(h) {
- if (i == default_hstate_idx)
+ if (i == default_hstate_idx) {
+ i++;
continue;
+ }
mnt = mount_one_hugetlbfs(h);
if (IS_ERR(mnt))
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..96d62d97694e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -676,6 +676,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next;
LIST_HEAD(dispose);
+again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -698,6 +699,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
+ if (need_resched()) {
+ spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
+ dispose_list(&dispose);
+ goto again;
+ }
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/locks.c b/fs/locks.c
index 6970f55daf54..44b6da032842 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
if (inode) {
/* userspace relies on this representation of dev_t */
- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
+ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
} else {
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fd0c8bcb8c1..5e1bf611a9eb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1728,7 +1728,7 @@ static bool is_mnt_ns_file(struct dentry *dentry)
dentry->d_fsdata == &mntns_operations;
}
-struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
+static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
@@ -3325,8 +3325,8 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name)
}
EXPORT_SYMBOL(mount_subtree);
-int ksys_mount(const char __user *dev_name, const char __user *dir_name,
- const char __user *type, unsigned long flags, void __user *data)
+SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
+ char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
@@ -3359,12 +3359,6 @@ out_type:
return ret;
}
-SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
- char __user *, type, unsigned long, flags, void __user *, data)
-{
- return ksys_mount(dev_name, dir_name, type, flags, data);
-}
-
/*
* Create a kernel mount representation for a new, prepared superblock
* (specified by fs_fd) and attach to an open_tree-like file descriptor.
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 3e77b728a22b..46f225580009 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -57,6 +57,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
* doing an __iget/iput with SB_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
+ * However, we should have been called /after/ evict_inodes
+ * removed all zero refcount inodes, in any case. Test to
+ * be sure.
*/
if (!atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
@@ -77,6 +80,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
iput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index a0431642c6b5..f75767bd623a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -3,6 +3,7 @@
#include <linux/pseudo_fs.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/ktime.h>
@@ -11,6 +12,8 @@
#include <linux/nsfs.h>
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *nsfs_mnt;
static long ns_ioctl(struct file *filp, unsigned int ioctl,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c4c51f3df60..cda1027d0819 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
&dlm_debug->d_filter_secs);
+ ocfs2_get_dlm_debug(dlm_debug);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1afe57f425a0..68ba354cf361 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1066,6 +1066,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
+ if (replayed) {
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b801c6353100..6220642fe113 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -227,13 +227,17 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
- int fh_type, fh_len, dwords;
- void *buf;
+ int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &real->d_sb->s_uuid;
+ int err;
- buf = kmalloc(buflen, GFP_KERNEL);
- if (!buf)
+ /* Make sure the real fid stays 32bit aligned */
+ BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
+ BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
+
+ fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
+ if (!fh)
return ERR_PTR(-ENOMEM);
/*
@@ -242,27 +246,19 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
+ fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
buflen = (dwords << 2);
- fh = ERR_PTR(-EIO);
+ err = -EIO;
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
- goto out;
+ goto out_err;
- BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
- fh_len = offsetof(struct ovl_fh, fid) + buflen;
- fh = kmalloc(fh_len, GFP_KERNEL);
- if (!fh) {
- fh = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- fh->version = OVL_FH_VERSION;
- fh->magic = OVL_FH_MAGIC;
- fh->type = fh_type;
- fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ fh->fb.version = OVL_FH_VERSION;
+ fh->fb.magic = OVL_FH_MAGIC;
+ fh->fb.type = fh_type;
+ fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
@@ -270,14 +266,15 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
- fh->flags |= OVL_FH_FLAG_PATH_UPPER;
- fh->len = fh_len;
- fh->uuid = *uuid;
- memcpy(fh->fid, buf, buflen);
+ fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
+ fh->fb.len = sizeof(fh->fb) + buflen;
+ fh->fb.uuid = *uuid;
-out:
- kfree(buf);
return fh;
+
+out_err:
+ kfree(fh);
+ return ERR_PTR(err);
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
@@ -300,8 +297,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
/*
* Do not fail when upper doesn't support xattrs.
*/
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
- fh ? fh->len : 0, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
+ fh ? fh->fb.len : 0, 0);
kfree(fh);
return err;
@@ -317,7 +314,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+ err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
kfree(fh);
return err;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 702aa63f6774..29abdb1d3b5c 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1170,7 +1170,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (newdentry == trap)
goto out_dput;
- if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
+ if (olddentry->d_inode == newdentry->d_inode)
goto out_dput;
err = 0;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 73c9775215b3..70e55588aedc 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -211,10 +211,11 @@ static int ovl_check_encode_origin(struct dentry *dentry)
return 1;
}
-static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
+ int len;
/*
* Check if we should encode a lower or upper file handle and maybe
@@ -231,11 +232,12 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
return PTR_ERR(fh);
err = -EOVERFLOW;
- if (fh->len > buflen)
+ len = OVL_FH_LEN(fh);
+ if (len > buflen)
goto fail;
- memcpy(buf, (char *)fh, fh->len);
- err = fh->len;
+ memcpy(fid, fh, len);
+ err = len;
out:
kfree(fh);
@@ -243,31 +245,16 @@ out:
fail:
pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
- dentry, err, buflen, fh ? (int)fh->len : 0,
- fh ? fh->type : 0);
+ dentry, err, buflen, fh ? (int)fh->fb.len : 0,
+ fh ? fh->fb.type : 0);
goto out;
}
-static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
-{
- int res, len = *max_len << 2;
-
- res = ovl_d_to_fh(dentry, (char *)fid, len);
- if (res <= 0)
- return FILEID_INVALID;
-
- len = res;
-
- /* Round up to dwords */
- *max_len = (len + 3) >> 2;
- return OVL_FILEID;
-}
-
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct dentry *dentry;
- int type;
+ int bytes = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
@@ -277,10 +264,14 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
if (WARN_ON(!dentry))
return FILEID_INVALID;
- type = ovl_dentry_to_fh(dentry, fid, max_len);
-
+ bytes = ovl_dentry_to_fid(dentry, fid, bytes);
dput(dentry);
- return type;
+ if (bytes <= 0)
+ return FILEID_INVALID;
+
+ *max_len = bytes >> 2;
+
+ return OVL_FILEID_V1;
}
/*
@@ -777,24 +768,45 @@ out_err:
goto out;
}
+static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+{
+ struct ovl_fh *fh;
+
+ /* If on-wire inner fid is aligned - nothing to do */
+ if (fh_type == OVL_FILEID_V1)
+ return (struct ovl_fh *)fid;
+
+ if (fh_type != OVL_FILEID_V0)
+ return ERR_PTR(-EINVAL);
+
+ fh = kzalloc(buflen, GFP_KERNEL);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy unaligned inner fh into aligned buffer */
+ memcpy(&fh->fb, fid, buflen - OVL_FH_WIRE_OFFSET);
+ return fh;
+}
+
static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
- struct ovl_fh *fh = (struct ovl_fh *) fid;
+ struct ovl_fh *fh = NULL;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
- err = -EINVAL;
- if (fh_type != OVL_FILEID)
+ fh = ovl_fid_to_fh(fid, len, fh_type);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
goto out_err;
err = ovl_check_fh_len(fh, len);
if (err)
goto out_err;
- flags = fh->flags;
+ flags = fh->fb.flags;
dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
ovl_upper_fh_to_d(sb, fh) :
ovl_lower_fh_to_d(sb, fh);
@@ -802,12 +814,18 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
+out:
+ /* We may have needed to re-align OVL_FILEID_V0 */
+ if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
+ kfree(fh);
+
return dentry;
out_err:
pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
- len, fh_type, flags, err);
- return ERR_PTR(err);
+ fh_len, fh_type, flags, err);
+ dentry = ERR_PTR(err);
+ goto out;
}
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index bc14781886bf..b045cf1826fc 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -200,8 +200,14 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
- stat->ino = lowerstat.ino;
lower_layer = ovl_layer_lower(dentry);
+ /*
+ * Cannot use origin st_dev;st_ino because
+ * origin inode content may differ from overlay
+ * inode content.
+ */
+ if (samefs || lower_layer->fsid)
+ stat->ino = lowerstat.ino;
}
/*
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c269d6033525..76ff66339173 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -84,21 +84,21 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
* Return -ENODATA for "origin unknown".
* Return <0 for an invalid file handle.
*/
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
{
- if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+ if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
return -EINVAL;
- if (fh->magic != OVL_FH_MAGIC)
+ if (fb->magic != OVL_FH_MAGIC)
return -EINVAL;
/* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
return -ENODATA;
/* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
return -ENODATA;
return 0;
@@ -119,15 +119,15 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_KERNEL);
+ fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh, res);
+ res = vfs_getxattr(dentry, name, fh->buf, res);
if (res < 0)
goto fail;
- err = ovl_check_fh_len(fh, res);
+ err = ovl_check_fb_len(&fh->fb, res);
if (err < 0) {
if (err == -ENODATA)
goto out;
@@ -158,12 +158,12 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
- if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+ if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
return NULL;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
- real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
+ bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+ bytes >> 2, (int)fh->fb.type,
connected ? ovl_acceptable : NULL, mnt);
if (IS_ERR(real)) {
/*
@@ -173,7 +173,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* index entries correctly.
*/
if (real == ERR_PTR(-ESTALE) &&
- !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+ !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
real = NULL;
return real;
}
@@ -323,6 +323,14 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
int i;
for (i = 0; i < ofs->numlower; i++) {
+ /*
+ * If lower fs uuid is not unique among lower fs we cannot match
+ * fh->uuid to layer.
+ */
+ if (ofs->lower_layers[i].fsid &&
+ ofs->lower_layers[i].fs->bad_uuid)
+ continue;
+
origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
connected);
if (origin)
@@ -400,7 +408,7 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
if (IS_ERR(ofh))
return PTR_ERR(ofh);
- if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
err = -ESTALE;
kfree(ofh);
@@ -431,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
err = ovl_verify_fh(dentry, name, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
+ err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
if (err)
goto fail;
@@ -505,20 +513,20 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
err = -EINVAL;
- if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
- fh = kzalloc(len, GFP_KERNEL);
+ fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len))
+ if (hex2bin(fh->buf, index->d_name.name, len))
goto fail;
- err = ovl_check_fh_len(fh, len);
+ err = ovl_check_fb_len(&fh->fb, len);
if (err)
goto fail;
@@ -597,11 +605,11 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
{
char *n, *s;
- n = kcalloc(fh->len, 2, GFP_KERNEL);
+ n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
if (!n)
return -ENOMEM;
- s = bin2hex(n, fh, fh->len);
+ s = bin2hex(n, fh->buf, fh->fb.len);
*name = (struct qstr) QSTR_INIT(n, s - n);
return 0;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf030f0..f283b1d69a9e 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -71,20 +71,36 @@ enum ovl_entry_flag {
#error Endianness not defined
#endif
-/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
-#define OVL_FILEID 0xfb
+/* The type used to be returned by overlay exportfs for misaligned fid */
+#define OVL_FILEID_V0 0xfb
+/* The type returned by overlay exportfs for 32bit aligned fid */
+#define OVL_FILEID_V1 0xf8
-/* On-disk and in-memeory format for redirect by file handle */
-struct ovl_fh {
+/* On-disk format for "origin" file handle */
+struct ovl_fb {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
- u8 fid[0]; /* file identifier */
+ u32 fid[0]; /* file identifier should be 32bit aligned in-memory */
} __packed;
+/* In-memory and on-wire format for overlay file handle */
+struct ovl_fh {
+ u8 padding[3]; /* make sure fb.fid is 32bit aligned */
+ union {
+ struct ovl_fb fb;
+ u8 buf[0];
+ };
+} __packed;
+
+#define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb)
+#define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len)
+#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
+ offsetof(struct ovl_fb, fid))
+
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
@@ -302,7 +318,13 @@ static inline void ovl_inode_unlock(struct inode *inode)
/* namei.c */
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
+
+static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+ return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
+}
+
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index a8279280e88d..28348c44ea5b 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -22,6 +22,8 @@ struct ovl_config {
struct ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
+ /* Unusable (conflicting) uuid */
+ bool bad_uuid;
};
struct ovl_layer {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index afbcb116a7f1..7621ff176d15 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1255,7 +1255,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
- if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+ if (!ofs->config.nfs_export && !ofs->upper_mnt)
return true;
for (i = 0; i < ofs->numlowerfs; i++) {
@@ -1263,9 +1263,13 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
* We use uuid to associate an overlay lower file handle with a
* lower layer, so we can accept lower fs with null uuid as long
* as all lower layers with null uuid are on the same fs.
+ * if we detect multiple lower fs with the same uuid, we
+ * disable lower file handle decoding on all of them.
*/
- if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+ if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) {
+ ofs->lower_fs[i].bad_uuid = true;
return false;
+ }
}
return true;
}
@@ -1277,6 +1281,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
unsigned int i;
dev_t dev;
int err;
+ bool bad_uuid = false;
/* fsid 0 is reserved for upper fs even with non upper overlay */
if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
@@ -1288,11 +1293,15 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
}
if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
- ofs->config.index = false;
- ofs->config.nfs_export = false;
- pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
- uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
- path->dentry);
+ bad_uuid = true;
+ if (ofs->config.index || ofs->config.nfs_export) {
+ ofs->config.index = false;
+ ofs->config.nfs_export = false;
+ pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+ uuid_is_null(&sb->s_uuid) ? "null" :
+ "conflicting",
+ path->dentry);
+ }
}
err = get_anon_bdev(&dev);
@@ -1303,6 +1312,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
ofs->lower_fs[ofs->numlowerfs].sb = sb;
ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
+ ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid;
ofs->numlowerfs++;
return ofs->numlowerfs;
diff --git a/fs/pipe.c b/fs/pipe.c
index 04d004ee2e8c..57502c3c0fba 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -581,7 +581,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
}
wait_event_interruptible(pipe->wait, pipe_writable(pipe));
__pipe_lock(pipe);
- was_empty = pipe_empty(head, pipe->tail);
+ was_empty = pipe_empty(pipe->head, pipe->tail);
}
out:
__pipe_unlock(pipe);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 84ad1c90d535..249672bf54fe 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -631,12 +631,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
+ * @inode: target inode
+ * @mode_p: mode (pointer) for update
+ * @acl: acl pointer
*
* Update the file mode when setting an ACL: compute the new file permission
* bits based on the ACL. In addition, if the ACL is equivalent to the new
- * file mode, set *acl to NULL to indicate that no ACL should be set.
+ * file mode, set *@acl to NULL to indicate that no ACL should be set.
*
- * As with chmod, clear the setgit bit if the caller is not in the owning group
+ * As with chmod, clear the setgid bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
* Called from set_acl inode operations.
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 37bdbec5b402..fd931d3e77be 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -134,7 +134,7 @@ static int show_stat(struct seq_file *p, void *v)
softirq += cpustat[CPUTIME_SOFTIRQ];
steal += cpustat[CPUTIME_STEAL];
guest += cpustat[CPUTIME_GUEST];
- guest_nice += cpustat[CPUTIME_USER];
+ guest_nice += cpustat[CPUTIME_GUEST_NICE];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
@@ -175,7 +175,7 @@ static int show_stat(struct seq_file *p, void *v)
softirq = cpustat[CPUTIME_SOFTIRQ];
steal = cpustat[CPUTIME_STEAL];
guest = cpustat[CPUTIME_GUEST];
- guest_nice = cpustat[CPUTIME_USER];
+ guest_nice = cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8caff834f002..013486b5125e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
prz = cxt->dprzs[cxt->dump_write_cnt];
+ /*
+ * Since this is a new crash dump, we need to reset the buffer in
+ * case it still has an old dump present. Without this, the new dump
+ * will get appended, which would seriously confuse anything trying
+ * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
+ * expects to find a dump header in the beginning of buffer data, so
+ * we must to reset the buffer values, in order to ensure that the
+ * header will be written to the beginning of the buffer.
+ */
+ persistent_ram_zap(prz);
+
/* Build header and append record contents. */
hlen = ramoops_write_kmsg_hdr(prz, record);
if (!hlen)
@@ -572,6 +583,7 @@ static int ramoops_init_przs(const char *name,
prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
&cxt->ecc_info,
cxt->memtype, flags, label);
+ kfree(label);
if (IS_ERR(prz_ar[i])) {
err = PTR_ERR(prz_ar[i]);
dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
@@ -617,6 +629,7 @@ static int ramoops_init_prz(const char *name,
label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
cxt->memtype, PRZ_FLAG_ZAP_OLD, label);
+ kfree(label);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 8823f65888f0..1f4d8c06f9be 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -574,7 +574,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
/* Initialize general buffer state. */
raw_spin_lock_init(&prz->buffer_lock);
prz->flags = flags;
- prz->label = label;
+ prz->label = kstrdup(label, GFP_KERNEL);
ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b0688c02dc90..b6a4f692d345 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -984,6 +984,7 @@ static int add_dquot_ref(struct super_block *sb, int type)
* later.
*/
old_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/super.c b/fs/super.c
index cfadab2cbf35..cd352530eca9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -448,10 +448,12 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- fsnotify_sb_delete(sb);
cgroup_writeback_umount();
+ /* evict all inodes with zero refcount */
evict_inodes(sb);
+ /* only nonzero refcount inodes can have marks */
+ fsnotify_sb_delete(sb);
if (sb->s_dio_done_wq) {
destroy_workqueue(sb->s_dio_done_wq);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c284e10af491..fc93fd88ec89 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}
+/*
+ * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
+ * return the largest possible minimum length.
+ */
unsigned int
xfs_alloc_min_freelist(
struct xfs_mount *mp,
struct xfs_perag *pag)
{
+ /* AG btrees have at least 1 level. */
+ static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
+ const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free;
+ ASSERT(mp->m_ag_maxlevels > 0);
+
/* space needed by-bno freespace btree */
- min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels);
/* space needed by-size freespace btree */
- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- min_free += min_t(unsigned int,
- pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels);
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
+ mp->m_rmap_maxlevels);
return min_free;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9ad1f991ba3..4c2e046fbfad 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL };
- u16 flags = 0;
+ uint16_t flags = 0;
struct xfs_trans *tp;
int error;
@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
- if (XFS_IS_CORRUPT(mp,
- stop_fsb >= got.br_startoff + got.br_blockcount)) {
+ if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 0aa87cbde49e..dd6fcaaea318 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length);
}
+
+xfs_dahash_t
+xfs_dir2_hashname(
+ struct xfs_mount *mp,
+ struct xfs_name *name)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
+ return xfs_ascii_ci_hashname(name);
+ return xfs_da_hashname(name->name, name->len);
+}
+
+enum xfs_dacmp
+xfs_dir2_compname(
+ struct xfs_da_args *args,
+ const unsigned char *name,
+ int len)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
+ return xfs_ascii_ci_compname(args, name, len);
+ return xfs_da_compname(args, name, len);
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index c031c53d0f0d..01ee0b926572 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
+int xfs_dir2_sf_entsize(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr, int len);
+void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
+void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
@@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN);
}
-static inline xfs_dahash_t
-xfs_dir2_hashname(
- struct xfs_mount *mp,
- struct xfs_name *name)
-{
- if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
- return xfs_ascii_ci_hashname(name);
- return xfs_da_hashname(name->name, name->len);
-}
-
-static inline enum xfs_dacmp
-xfs_dir2_compname(
- struct xfs_da_args *args,
- const unsigned char *name,
- int len)
-{
- if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
- return xfs_ascii_ci_compname(args, name, len);
- return xfs_da_compname(args, name, len);
-}
+xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
+enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
+ const unsigned char *name, int len);
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 8b94d33d232f..7b7f6fb2ea3b 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
-static int
+int
xfs_dir2_sf_entsize(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER;
}
-static void
+void
xfs_dir2_sf_put_ino(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN;
}
-static void
+void
xfs_dir2_sf_put_ftype(
struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep,
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 988cde7744e6..5b759af4d165 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else
igeo->ialloc_align = 0;
}
+
+/* Compute the location of the root directory inode that is laid out by mkfs. */
+xfs_ino_t
+xfs_ialloc_calc_rootino(
+ struct xfs_mount *mp,
+ int sunit)
+{
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_agblock_t first_bno;
+
+ /*
+ * Pre-calculate the geometry of AG 0. We know what it looks like
+ * because libxfs knows how to create allocation groups now.
+ *
+ * first_bno is the first block in which mkfs could possibly have
+ * allocated the root directory inode, once we factor in the metadata
+ * that mkfs formats before it. Namely, the four AG headers...
+ */
+ first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+ /* ...the two free space btree roots... */
+ first_bno += 2;
+
+ /* ...the inode btree root... */
+ first_bno += 1;
+
+ /* ...the initial AGFL... */
+ first_bno += xfs_alloc_min_freelist(mp, NULL);
+
+ /* ...the free inode btree root... */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reverse mapping btree root... */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reference count btree... */
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ first_bno++;
+
+ /*
+ * ...and the log, if it is allocated in the first allocation group.
+ *
+ * This can happen with filesystems that only have a single
+ * allocation group, or very odd geometries created by old mkfs
+ * versions on very small filesystems.
+ */
+ if (mp->m_sb.sb_logstart &&
+ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
+ first_bno += mp->m_sb.sb_logblocks;
+
+ /*
+ * Now round first_bno up to whatever allocation alignment is given
+ * by the filesystem or was passed in.
+ */
+ if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
+ first_bno = roundup(first_bno, sunit);
+ else if (xfs_sb_version_hasalign(&mp->m_sb) &&
+ mp->m_sb.sb_inoalignmt > 1)
+ first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
+
+ return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 323592d563d5..72b3468b97b1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
+xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index c55cd9a3dec9..7a9c04920505 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -197,6 +197,24 @@ xfs_calc_inode_chunk_res(
}
/*
+ * Per-extent log reservation for the btree changes involved in freeing or
+ * allocating a realtime extent. We have to be able to log as many rtbitmap
+ * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
+ * as well as the realtime summary block.
+ */
+static unsigned int
+xfs_rtalloc_log_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+ unsigned int rtbmp_bytes;
+
+ rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
+ return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+}
+
+/*
* Various log reservation values.
*
* These are based on the size of the file system block because that is what
@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/*
* In a write transaction we can allocate a maximum of 2
- * extents. This gives:
+ * extents. This gives (t1):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
+ * Or, if we're writing to a realtime file (t2):
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 1 block
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
- XFS_FSB_TO_B(mp, 1)) +
+ blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
+ } else {
+ t2 = 0;
+ }
+
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
- * In truncating a file we free up to two extents at once. We can modify:
+ * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
+
+ t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+ } else {
+ t3 = 0;
+ }
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2efd78a9719e..e62fb5216341 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
+ struct xfs_mount *mp = ip->i_mount;
int error;
/*
@@ -1005,6 +1006,17 @@ xfs_prepare_shift(
}
/*
+ * Shift operations must stabilize the start block offset boundary along
+ * with the full range of the operation. If we don't, a COW writeback
+ * completion could race with an insert, front merge with the start
+ * extent (after split) during the shift and corrupt the file. Start
+ * with the block just prior to the start to stabilize the boundary.
+ */
+ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
+ if (offset)
+ offset -= (1 << mp->m_sb.sb_blocklog);
+
+ /*
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF.
*/
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 3458a1264a3f..3984779e5911 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -956,7 +956,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fca65109cf24..56efe140c923 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -31,7 +31,7 @@
#include "xfs_reflink.h"
#include "xfs_extent_busy.h"
#include "xfs_health.h"
-
+#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -360,66 +360,119 @@ release_buf:
}
/*
- * Update alignment values based on mount options and sb values
+ * If the sunit/swidth change would move the precomputed root inode value, we
+ * must reject the ondisk change because repair will stumble over that.
+ * However, we allow the mount to proceed because we never rejected this
+ * combination before. Returns true to update the sb, false otherwise.
+ */
+static inline int
+xfs_check_new_dalign(
+ struct xfs_mount *mp,
+ int new_dalign,
+ bool *update_sb)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ xfs_ino_t calc_ino;
+
+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
+
+ if (sbp->sb_rootino == calc_ino) {
+ *update_sb = true;
+ return 0;
+ }
+
+ xfs_warn(mp,
+"Cannot change stripe alignment; would require moving root inode.");
+
+ /*
+ * XXX: Next time we add a new incompat feature, this should start
+ * returning -EINVAL to fail the mount. Until then, spit out a warning
+ * that we're ignoring the administrator's instructions.
+ */
+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
+ *update_sb = false;
+ return 0;
+}
+
+/*
+ * If we were provided with new sunit/swidth values as mount options, make sure
+ * that they pass basic alignment and superblock feature checks, and convert
+ * them into the same units (FSB) that everything else expects. This step
+ * /must/ be done before computing the inode geometry.
*/
STATIC int
-xfs_update_alignment(xfs_mount_t *mp)
+xfs_validate_new_dalign(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
+ if (mp->m_dalign == 0)
+ return 0;
- if (mp->m_dalign) {
+ /*
+ * If stripe unit and stripe width are not multiples
+ * of the fs blocksize turn off alignment.
+ */
+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ xfs_warn(mp,
+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
+ mp->m_sb.sb_blocksize);
+ return -EINVAL;
+ } else {
/*
- * If stripe unit and stripe width are not multiples
- * of the fs blocksize turn off alignment.
+ * Convert the stripe unit and width to FSBs.
*/
- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. blocksize(%d)",
- sbp->sb_blocksize);
+ "alignment check failed: sunit/swidth vs. agsize(%d)",
+ mp->m_sb.sb_agblocks);
return -EINVAL;
- } else {
- /*
- * Convert the stripe unit and width to FSBs.
- */
- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
- xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. agsize(%d)",
- sbp->sb_agblocks);
- return -EINVAL;
- } else if (mp->m_dalign) {
- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
- } else {
- xfs_warn(mp,
- "alignment check failed: sunit(%d) less than bsize(%d)",
- mp->m_dalign, sbp->sb_blocksize);
- return -EINVAL;
- }
- }
-
- /*
- * Update superblock with new values
- * and log changes
- */
- if (xfs_sb_version_hasdalign(sbp)) {
- if (sbp->sb_unit != mp->m_dalign) {
- sbp->sb_unit = mp->m_dalign;
- mp->m_update_sb = true;
- }
- if (sbp->sb_width != mp->m_swidth) {
- sbp->sb_width = mp->m_swidth;
- mp->m_update_sb = true;
- }
+ } else if (mp->m_dalign) {
+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
- "cannot change alignment: superblock does not support data alignment");
+ "alignment check failed: sunit(%d) less than bsize(%d)",
+ mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL;
}
+ }
+
+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
+ xfs_warn(mp,
+"cannot change alignment: superblock does not support data alignment");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Update alignment values based on mount options and sb values. */
+STATIC int
+xfs_update_alignment(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+
+ if (mp->m_dalign) {
+ bool update_sb;
+ int error;
+
+ if (sbp->sb_unit == mp->m_dalign &&
+ sbp->sb_width == mp->m_swidth)
+ return 0;
+
+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
+ if (error || !update_sb)
+ return error;
+
+ sbp->sb_unit = mp->m_dalign;
+ sbp->sb_width = mp->m_swidth;
+ mp->m_update_sb = true;
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) {
- mp->m_dalign = sbp->sb_unit;
- mp->m_swidth = sbp->sb_width;
+ mp->m_dalign = sbp->sb_unit;
+ mp->m_swidth = sbp->sb_width;
}
return 0;
@@ -648,12 +701,12 @@ xfs_mountfs(
}
/*
- * Check if sb_agblocks is aligned at stripe boundary
- * If sb_agblocks is NOT aligned turn off m_dalign since
- * allocator alignment is within an ag, therefore ag has
- * to be aligned at stripe boundary.
+ * If we were given new sunit/swidth options, do some basic validation
+ * checks and convert the incore dalign and swidth values to the
+ * same units (FSB) that everything else uses. This /must/ happen
+ * before computing the inode geometry.
*/
- error = xfs_update_alignment(mp);
+ error = xfs_validate_new_dalign(mp);
if (error)
goto out;
@@ -664,6 +717,17 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
+ /*
+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
+ * is NOT aligned turn off m_dalign since allocator alignment is within
+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
+ * we must compute the free space and rmap btree geometry before doing
+ * this.
+ */
+ error = xfs_update_alignment(mp);
+ if (error)
+ goto out;
+
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c13bb3655e48..a86be7f807ee 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
DEFINE_KMEM_EVENT(kmem_realloc);
DEFINE_KMEM_EVENT(kmem_zone_alloc);
+TRACE_EVENT(xfs_check_new_dalign,
+ TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
+ TP_ARGS(mp, new_dalign, calc_rootino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, new_dalign)
+ __field(xfs_ino_t, sb_rootino)
+ __field(xfs_ino_t, calc_rootino)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->new_dalign = new_dalign;
+ __entry->sb_rootino = mp->m_sb.sb_rootino;
+ __entry->calc_rootino = calc_rootino;
+ ),
+ TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->new_dalign, __entry->sb_rootino,
+ __entry->calc_rootino)
+)
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH