From df3cb754d13d2cd5490db9b8d536311f8413a92e Mon Sep 17 00:00:00 2001 From: Jerry Lee 李修賢 Date: Mon, 18 Jul 2022 10:25:19 +0000 Subject: ext4: continue to expand file system when the target size doesn't reach When expanding a file system from (16TiB-2MiB) to 18TiB, the operation exits early which leads to result inconsistency between resize2fs and Ext4 kernel driver. === before === ○ → resize2fs /dev/mapper/thin resize2fs 1.45.5 (07-Jan-2020) Filesystem at /dev/mapper/thin is mounted on /mnt/test; on-line resizing required old_desc_blocks = 2048, new_desc_blocks = 2304 The filesystem on /dev/mapper/thin is now 4831837696 (4k) blocks long. [ 865.186308] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 912.091502] dm-4: detected capacity change from 34359738368 to 38654705664 [ 970.030550] dm-5: detected capacity change from 34359734272 to 38654701568 [ 1000.012751] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 1000.012878] EXT4-fs (dm-5): resized filesystem to 4294967296 === after === [ 129.104898] EXT4-fs (dm-5): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none. [ 143.773630] dm-4: detected capacity change from 34359738368 to 38654705664 [ 198.203246] dm-5: detected capacity change from 34359734272 to 38654701568 [ 207.918603] EXT4-fs (dm-5): resizing filesystem from 4294966784 to 4831837696 blocks [ 207.918754] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 207.918758] EXT4-fs (dm-5): Converting file system to meta_bg [ 207.918790] EXT4-fs (dm-5): resizing filesystem from 4294967296 to 4831837696 blocks [ 221.454050] EXT4-fs (dm-5): resized to 4658298880 blocks [ 227.634613] EXT4-fs (dm-5): resized filesystem to 4831837696 Signed-off-by: Jerry Lee Link: https://lore.kernel.org/r/PU1PR04MB22635E739BD21150DC182AC6A18C9@PU1PR04MB2263.apcprd04.prod.outlook.com Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index fea2a68d067b..6dfe9ccae0c5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2122,7 +2122,7 @@ retry: goto out; } - if (ext4_blocks_count(es) == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out; err = ext4_alloc_flex_bg_array(sb, n_group + 1); -- cgit v1.2.3 From d412df530f77d0f61c41b83f925997452fc3944c Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Fri, 22 Jul 2022 12:39:10 -0400 Subject: ext4: minor defrag code improvements Modify the error returns for two file types that can't be defragged to more clearly communicate those restrictions to a caller. When the defrag code is applied to swap files, return -ETXTBSY, and when applied to quota files, return -EOPNOTSUPP. Move an extent tree search whose results are only occasionally required to the site always requiring them for improved efficiency. Address a few typos. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20220722163910.268564-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 701f1d6a217f..2b83621d16e2 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -472,19 +472,17 @@ mext_check_arguments(struct inode *orig_inode, if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) return -EPERM; - /* Ext4 move extent does not support swapfile */ + /* Ext4 move extent does not support swap files */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { - ext4_debug("ext4 move extent: The argument files should " - "not be swapfile [ino:orig %lu, donor %lu]\n", + ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); - return -EBUSY; + return -ETXTBSY; } if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) { - ext4_debug("ext4 move extent: The argument files should " - "not be quota files [ino:orig %lu, donor %lu]\n", + ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); - return -EBUSY; + return -EOPNOTSUPP; } /* Ext4 move extent supports only extent based file */ @@ -631,11 +629,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, if (ret) goto out; ex = path[path->p_depth].p_ext; - next_blk = ext4_ext_next_allocated_block(path); cur_blk = le32_to_cpu(ex->ee_block); cur_len = ext4_ext_get_actual_len(ex); /* Check hole before the start pos */ if (cur_blk + cur_len - 1 < o_start) { + next_blk = ext4_ext_next_allocated_block(path); if (next_blk == EXT_MAX_BLOCKS) { ret = -ENODATA; goto out; @@ -663,7 +661,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, donor_page_index = d_start >> (PAGE_SHIFT - donor_inode->i_blkbits); offset_in_page = o_start % blocks_per_page; - if (cur_len > blocks_per_page- offset_in_page) + if (cur_len > blocks_per_page - offset_in_page) cur_len = blocks_per_page - offset_in_page; /* * Up semaphore to avoid following problems: -- cgit v1.2.3 From 4bb26f2885ac6930984ee451b952c5a6042f2c0e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 27 Jul 2022 17:57:53 +0200 Subject: ext4: avoid crash when inline data creation follows DIO write When inode is created and written to using direct IO, there is nothing to clear the EXT4_STATE_MAY_INLINE_DATA flag. Thus when inode gets truncated later to say 1 byte and written using normal write, we will try to store the data as inline data. This confuses the code later because the inode now has both normal block and inline data allocated and the confusion manifests for example as: kernel BUG at fs/ext4/inode.c:2721! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 359 Comm: repro Not tainted 5.19.0-rc8-00001-g31ba1e3b8305-dirty #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 RIP: 0010:ext4_writepages+0x363d/0x3660 RSP: 0018:ffffc90000ccf260 EFLAGS: 00010293 RAX: ffffffff81e1abcd RBX: 0000008000000000 RCX: ffff88810842a180 RDX: 0000000000000000 RSI: 0000008000000000 RDI: 0000000000000000 RBP: ffffc90000ccf650 R08: ffffffff81e17d58 R09: ffffed10222c680b R10: dfffe910222c680c R11: 1ffff110222c680a R12: ffff888111634128 R13: ffffc90000ccf880 R14: 0000008410000000 R15: 0000000000000001 FS: 00007f72635d2640(0000) GS:ffff88811b000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565243379180 CR3: 000000010aa74000 CR4: 0000000000150eb0 Call Trace: do_writepages+0x397/0x640 filemap_fdatawrite_wbc+0x151/0x1b0 file_write_and_wait_range+0x1c9/0x2b0 ext4_sync_file+0x19e/0xa00 vfs_fsync_range+0x17b/0x190 ext4_buffered_write_iter+0x488/0x530 ext4_file_write_iter+0x449/0x1b90 vfs_write+0xbcd/0xf40 ksys_write+0x198/0x2c0 __x64_sys_write+0x7b/0x90 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix the problem by clearing EXT4_STATE_MAY_INLINE_DATA when we are doing direct IO write to a file. Cc: stable@kernel.org Reported-by: Tadeusz Struk Reported-by: syzbot+bd13648a53ed6933ca49@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=a1e89d09bbbcbd5c4cb45db230ee28c822953984 Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Tested-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220727155753.13969-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 109d07629f81..847a2f806b8f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -528,6 +528,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } + /* + * Make sure inline data cannot be created anymore since we are going + * to allocate blocks for DIO. We know the inode does not have any + * inline data now because ext4_dio_supported() checked for that. + */ + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); offset = iocb->ki_pos; count = ret; -- cgit v1.2.3 From 2d544ec923dbe5fbed64a7f43dccf527218380bc Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Thu, 28 Jul 2022 11:02:49 +0800 Subject: ext4: remove deprecated noacl/nouser_xattr options These two options should have been removed since 3.5, but none notices it. Recently, I and Darrick found this. Also, have some discussion for this[1][2][3]. So now, let's remove them. Link: https://lore.kernel.org/linux-ext4/6258F7BB.8010104@fujitsu.com/T/#u[1] Link: https://lore.kernel.org/linux-ext4/20220602110421.ymoug3rwfspmryqg@fedora/T/#t[2] Link: https://lore.kernel.org/linux-ext4/08e2ca4c8f6344bdcd76d75b821116c6147fd57a.camel@kernel.org/T/#t[3] Signed-off-by: Yang Xu Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1658977369-2478-1-git-send-email-xuyang2018.jy@fujitsu.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a66abcca1a8..23329d7c9d55 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1576,7 +1576,7 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_nouid32, Opt_debug, Opt_removed, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_user_xattr, Opt_acl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, @@ -1662,9 +1662,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("oldalloc", Opt_removed), fsparam_flag ("orlov", Opt_removed), fsparam_flag ("user_xattr", Opt_user_xattr), - fsparam_flag ("nouser_xattr", Opt_nouser_xattr), fsparam_flag ("acl", Opt_acl), - fsparam_flag ("noacl", Opt_noacl), fsparam_flag ("norecovery", Opt_noload), fsparam_flag ("noload", Opt_noload), fsparam_flag ("bh", Opt_removed), @@ -1814,13 +1812,10 @@ static const struct mount_opts { {Opt_journal_ioprio, 0, MOPT_NO_EXT2}, {Opt_data, 0, MOPT_NO_EXT2}, {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, - {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, #ifdef CONFIG_EXT4_FS_POSIX_ACL {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, - {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, #else {Opt_acl, 0, MOPT_NOSUPPORT}, - {Opt_noacl, 0, MOPT_NOSUPPORT}, #endif {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, @@ -2120,10 +2115,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) else return note_qf_name(fc, GRPQUOTA, param); #endif - case Opt_noacl: - case Opt_nouser_xattr: - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); - break; case Opt_sb: if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { ext4_msg(NULL, KERN_WARNING, -- cgit v1.2.3 From 426d15ad11419066f7042ffa8fbf1b5c21a1ecbe Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 31 Jul 2022 20:24:53 -0700 Subject: ext4: don't run ext4lazyinit for read-only filesystems On a read-only filesystem, we won't invoke the block allocator, so we don't need to prefetch the block bitmaps. This avoids starting and running the ext4lazyinit thread at all on a system with no read-write ext4 filesystems (for instance, a container VM with read-only filesystems underneath an overlayfs). Fixes: 21175ca434c5 ("ext4: make prefetch_block_bitmaps default") Signed-off-by: Josh Triplett Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/48b41da1498fcac3287e2e06b660680646c1c050.1659323972.git.josh@joshtriplett.org Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 23329d7c9d55..afbbf07f58a7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3973,9 +3973,9 @@ int ext4_register_li_request(struct super_block *sb, goto out; } - if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && - (first_not_zeroed == ngroups || sb_rdonly(sb) || - !test_opt(sb, INIT_INODE_TABLE))) + if (sb_rdonly(sb) || + (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && + (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE)))) goto out; elr = ext4_li_request_new(sb, first_not_zeroed); -- cgit v1.2.3 From f9c1f248607d5546075d3f731e7607d5571f2b60 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 5 Aug 2022 20:39:47 +0800 Subject: ext4: fix null-ptr-deref in ext4_write_info I caught a null-ptr-deref bug as follows: ================================================================== KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 1 PID: 1589 Comm: umount Not tainted 5.10.0-02219-dirty #339 RIP: 0010:ext4_write_info+0x53/0x1b0 [...] Call Trace: dquot_writeback_dquots+0x341/0x9a0 ext4_sync_fs+0x19e/0x800 __sync_filesystem+0x83/0x100 sync_filesystem+0x89/0xf0 generic_shutdown_super+0x79/0x3e0 kill_block_super+0xa1/0x110 deactivate_locked_super+0xac/0x130 deactivate_super+0xb6/0xd0 cleanup_mnt+0x289/0x400 __cleanup_mnt+0x16/0x20 task_work_run+0x11c/0x1c0 exit_to_user_mode_prepare+0x203/0x210 syscall_exit_to_user_mode+0x5b/0x3a0 do_syscall_64+0x59/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================== Above issue may happen as follows: ------------------------------------- exit_to_user_mode_prepare task_work_run __cleanup_mnt cleanup_mnt deactivate_super deactivate_locked_super kill_block_super generic_shutdown_super shrink_dcache_for_umount dentry = sb->s_root sb->s_root = NULL <--- Here set NULL sync_filesystem __sync_filesystem sb->s_op->sync_fs > ext4_sync_fs dquot_writeback_dquots sb->dq_op->write_info > ext4_write_info ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2) d_inode(sb->s_root) s_root->d_inode <--- Null pointer dereference To solve this problem, we use ext4_journal_start_sb directly to avoid s_root being used. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220805123947.565152-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index afbbf07f58a7..aba4b1fca9e4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6644,7 +6644,7 @@ static int ext4_write_info(struct super_block *sb, int type) handle_t *handle; /* Data block + inode block */ - handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); + handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); -- cgit v1.2.3 From 3b575495ab8dbb4dbe85b4ac7f991693c3668ff5 Mon Sep 17 00:00:00 2001 From: Lalith Rajendran Date: Thu, 18 Aug 2022 21:40:49 +0000 Subject: ext4: make ext4_lazyinit_thread freezable ext4_lazyinit_thread is not set freezable. Hence when the thread calls try_to_freeze it doesn't freeze during suspend and continues to send requests to the storage during suspend, resulting in suspend failures. Cc: stable@kernel.org Signed-off-by: Lalith Rajendran Link: https://lore.kernel.org/r/20220818214049.1519544-1-lalithkraj@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index aba4b1fca9e4..6bb0f493221c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3758,6 +3758,7 @@ static int ext4_lazyinit_thread(void *arg) unsigned long next_wakeup, cur; BUG_ON(NULL == eli); + set_freezable(); cont_thread: while (true) { -- cgit v1.2.3 From 61a1d87a324ad5e3ed27c6699dfc93218fcf3201 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Aug 2022 13:48:32 +0200 Subject: ext4: fix check for block being out of directory size The check in __ext4_read_dirblock() for block being outside of directory size was wrong because it compared block number against directory size in bytes. Fix it. Fixes: 65f8ea4cd57d ("ext4: check if directory block is within i_size") CVE: CVE-2022-1184 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20220822114832.1482-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3a31b662f661..bc2e0612ec32 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -126,7 +126,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, struct ext4_dir_entry *dirent; int is_dx_block = 0; - if (block >= inode->i_size) { + if (block >= inode->i_size >> inode->i_blkbits) { ext4_error_inode(inode, func, line, block, "Attempting to read directory block (%u) that is past i_size (%llu)", block, inode->i_size); -- cgit v1.2.3 From 50f094a5580e6297bf10a807d16f0ee23fa576cf Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 24 Aug 2022 18:03:47 +0200 Subject: ext4: don't increase iversion counter for ea_inodes ea_inodes are using i_version for storing part of the reference count so we really need to leave it alone. The problem can be reproduced by xfstest ext4/026 when iversion is enabled. Fix it by not calling inode_inc_iversion() for EXT4_EA_INODE_FL inodes in ext4_mark_iloc_dirty(). Cc: stable@kernel.org Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220824160349.39664-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 601214453c3a..2a220be34caa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5731,7 +5731,12 @@ int ext4_mark_iloc_dirty(handle_t *handle, } ext4_fc_track_inode(handle, inode); - if (IS_I_VERSION(inode)) + /* + * ea_inodes are using i_version for storing reference count, don't + * mess with it + */ + if (IS_I_VERSION(inode) && + !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) inode_inc_iversion(inode); /* the do_update_inode consumes one bh->b_count */ -- cgit v1.2.3 From 1ff20307393e17dc57fde62226df625a3a3c36e9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Aug 2022 18:03:49 +0200 Subject: ext4: unconditionally enable the i_version counter The original i_version implementation was pretty expensive, requiring a log flush on every change. Because of this, it was gated behind a mount option (implemented via the MS_I_VERSION mountoption flag). Commit ae5e165d855d (fs: new API for handling inode->i_version) made the i_version flag much less expensive, so there is no longer a performance penalty from enabling it. xfs and btrfs already enable it unconditionally when the on-disk format can support it. Have ext4 ignore the SB_I_VERSION flag, and just enable it unconditionally. While we're in here, mark the i_version mount option Opt_removed. [ Removed leftover bits of i_version from ext4_apply_options() since it now can't ever be set in ctx->mask_s_flags -- lczerner ] Cc: stable@kernel.org Cc: Dave Chinner Cc: Benjamin Coddington Cc: Christoph Hellwig Cc: Darrick J. Wong Signed-off-by: Jeff Layton Signed-off-by: Lukas Czerner Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220824160349.39664-3-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 5 ++--- fs/ext4/super.c | 22 +++++----------------- 2 files changed, 7 insertions(+), 20 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a220be34caa..c77d40f05763 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5425,7 +5425,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; } - if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) + if (attr->ia_size != inode->i_size) inode_inc_iversion(inode); if (shrink) { @@ -5735,8 +5735,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, * ea_inodes are using i_version for storing reference count, don't * mess with it */ - if (IS_I_VERSION(inode) && - !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) inode_inc_iversion(inode); /* the do_update_inode consumes one bh->b_count */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6bb0f493221c..2a7af6cbe5e0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1585,7 +1585,7 @@ enum { Opt_inlinecrypt, Opt_usrjquota, Opt_grpjquota, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, + Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, @@ -1692,7 +1692,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("barrier", Opt_barrier), fsparam_u32 ("barrier", Opt_barrier), fsparam_flag ("nobarrier", Opt_nobarrier), - fsparam_flag ("i_version", Opt_i_version), + fsparam_flag ("i_version", Opt_removed), fsparam_flag ("dax", Opt_dax), fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), fsparam_u32 ("stripe", Opt_stripe), @@ -2131,11 +2131,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_abort: ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; - case Opt_i_version: - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); - ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); - ctx_set_flags(ctx, SB_I_VERSION); - return 0; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT ctx_set_flags(ctx, SB_INLINECRYPT); @@ -2805,14 +2800,6 @@ static void ext4_apply_options(struct fs_context *fc, struct super_block *sb) sb->s_flags &= ~ctx->mask_s_flags; sb->s_flags |= ctx->vals_s_flags; - /* - * i_version differs from common mount option iversion so we have - * to let vfs know that it was set, otherwise it would get cleared - * on remount - */ - if (ctx->mask_s_flags & SB_I_VERSION) - fc->sb_flags |= SB_I_VERSION; - #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) APPLY(s_commit_interval); APPLY(s_stripe); @@ -2961,8 +2948,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); - if (sb->s_flags & SB_I_VERSION) - SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & @@ -4632,6 +4617,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + /* i_version is always enabled now */ + sb->s_flags |= SB_I_VERSION; + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (ext4_has_compat_features(sb) || ext4_has_ro_compat_features(sb) || -- cgit v1.2.3 From 0b73284c564d3ae4feef4bc920292f004acf4980 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 31 Aug 2022 15:46:29 +0800 Subject: ext4: ext4_read_bh_lock() should submit IO if the buffer isn't uptodate Recently we notice that ext4 filesystem would occasionally fail to read metadata from disk and report error message, but the disk and block layer looks fine. After analyse, we lockon commit 88dbcbb3a484 ("blkdev: avoid migration stalls for blkdev pages"). It provide a migration method for the bdev, we could move page that has buffers without extra users now, but it lock the buffers on the page, which breaks the fragile metadata read operation on ext4 filesystem, ext4_read_bh_lock() was copied from ll_rw_block(), it depends on the assumption of that locked buffer means it is under IO. So it just trylock the buffer and skip submit IO if it lock failed, after wait_on_buffer() we conclude IO error because the buffer is not uptodate. This issue could be easily reproduced by add some delay just after buffer_migrate_lock_buffers() in __buffer_migrate_folio() and do fsstress on ext4 filesystem. EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #73193: comm fsstress: reading directory lblock 0 EXT4-fs error (device pmem1): __ext4_find_entry:1658: inode #75334: comm fsstress: reading directory lblock 0 Fix it by removing the trylock logic in ext4_read_bh_lock(), just lock the buffer and submit IO if it's not uptodate, and also leave over readahead helper. Cc: stable@kernel.org Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220831074629.3755110-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2a7af6cbe5e0..4eb18c4c52d7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -205,19 +205,12 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait) { - if (trylock_buffer(bh)) { - if (wait) - return ext4_read_bh(bh, op_flags, NULL); + lock_buffer(bh); + if (!wait) { ext4_read_bh_nowait(bh, op_flags, NULL); return 0; } - if (wait) { - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return 0; - return -EIO; - } - return 0; + return ext4_read_bh(bh, op_flags, NULL); } /* @@ -264,7 +257,8 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) struct buffer_head *bh = sb_getblk_gfp(sb, block, 0); if (likely(bh)) { - ext4_read_bh_lock(bh, REQ_RAHEAD, false); + if (trylock_buffer(bh)) + ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); brelse(bh); } } -- cgit v1.2.3 From d1052d236eddf6aa851434db1897b942e8db9921 Mon Sep 17 00:00:00 2001 From: Jinke Han Date: Sat, 3 Sep 2022 09:24:29 +0800 Subject: ext4: place buffer head allocation before handle start In our product environment, we encounter some jbd hung waiting handles to stop while several writters were doing memory reclaim for buffer head allocation in delay alloc write path. Ext4 do buffer head allocation with holding transaction handle which may be blocked too long if the reclaim works not so smooth. According to our bcc trace, the reclaim time in buffer head allocation can reach 258s and the jbd transaction commit also take almost the same time meanwhile. Except for these extreme cases, we often see several seconds delays for cgroup memory reclaim on our servers. This is more likely to happen considering docker environment. One thing to note, the allocation of buffer heads is as often as page allocation or more often when blocksize less than page size. Just like page cache allocation, we should also place the buffer head allocation before startting the handle. Cc: stable@kernel.org Signed-off-by: Jinke Han Link: https://lore.kernel.org/r/20220903012429.22555-1-hanjinke.666@bytedance.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c77d40f05763..a56c57375456 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1188,6 +1188,13 @@ retry_grab: page = grab_cache_page_write_begin(mapping, index); if (!page) return -ENOMEM; + /* + * The same as page allocation, we prealloc buffer heads before + * starting the handle. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, inode->i_sb->s_blocksize, 0); + unlock_page(page); retry_journal: -- cgit v1.2.3 From a642c2c0827f5604a93f9fa1e5701eecdce4ae22 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 8 Sep 2022 13:24:42 -0400 Subject: ext4: fix i_version handling in ext4 ext4 currently updates the i_version counter when the atime is updated during a read. This is less than ideal as it can cause unnecessary cache invalidations with NFSv4 and unnecessary remeasurements for IMA. The increment in ext4_mark_iloc_dirty is also problematic since it can corrupt the i_version counter for ea_inodes. We aren't bumping the file times in ext4_mark_iloc_dirty, so changing the i_version there seems wrong, and is the cause of both problems. Remove that callsite and add increments to the setattr, setxattr and ioctl codepaths, at the same times that we update the ctime. The i_version bump that already happens during timestamp updates should take care of the rest. In ext4_move_extents, increment the i_version on both inodes, and also add in missing ctime updates. [ Some minor updates since we've already enabled the i_version counter unconditionally already via another patch series. -- TYT ] Cc: stable@kernel.org Cc: Lukas Czerner Reviewed-by: Jan Kara Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20220908172448.208585-3-jlayton@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 14 +++++--------- fs/ext4/ioctl.c | 4 ++++ fs/ext4/xattr.c | 1 + 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a56c57375456..6da73be32bff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5349,6 +5349,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, int error, rc = 0; int orphan = 0; const unsigned int ia_valid = attr->ia_valid; + bool inc_ivers = true; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; @@ -5432,8 +5433,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; } - if (attr->ia_size != inode->i_size) - inode_inc_iversion(inode); + if (attr->ia_size == inode->i_size) + inc_ivers = false; if (shrink) { if (ext4_should_order_data(inode)) { @@ -5535,6 +5536,8 @@ out_mmap_sem: } if (!error) { + if (inc_ivers) + inode_inc_iversion(inode); setattr_copy(mnt_userns, inode, attr); mark_inode_dirty(inode); } @@ -5738,13 +5741,6 @@ int ext4_mark_iloc_dirty(handle_t *handle, } ext4_fc_track_inode(handle, inode); - /* - * ea_inodes are using i_version for storing reference count, don't - * mess with it - */ - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - inode_inc_iversion(inode); - /* the do_update_inode consumes one bh->b_count */ get_bh(iloc->bh); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3cf3ec4b1c21..ad3a294a88eb 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -452,6 +452,7 @@ static long swap_inode_boot_loader(struct super_block *sb, swap_inode_data(inode, inode_bl); inode->i_ctime = inode_bl->i_ctime = current_time(inode); + inode_inc_iversion(inode); inode->i_generation = prandom_u32(); inode_bl->i_generation = prandom_u32(); @@ -665,6 +666,7 @@ static int ext4_ioctl_setflags(struct inode *inode, ext4_set_inode_flags(inode, false); inode->i_ctime = current_time(inode); + inode_inc_iversion(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: @@ -775,6 +777,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) EXT4_I(inode)->i_projid = kprojid; inode->i_ctime = current_time(inode); + inode_inc_iversion(inode); out_dirty: rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) @@ -1257,6 +1260,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode->i_ctime = current_time(inode); + inode_inc_iversion(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 533216e80fa2..36d6ba7190b6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2412,6 +2412,7 @@ retry_inode: if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = current_time(inode); + inode_inc_iversion(inode); if (!value) no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); -- cgit v1.2.3 From ebd5d23e88b7fc84cbbb7a002aa35bfd5bfadb10 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 9 Sep 2022 14:53:07 +0800 Subject: ext4: remove ext4_inline_data_fiemap() declaration ext4_inline_data_fiemap() has been removed since commit d3b6f23f7167 ("ext4: move ext4_fiemap to use iomap framework"), so remove it. Signed-off-by: Gaosheng Cui Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220909065307.1155201-1-cuigaosheng1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3bf9a6926798..a9015cec2c39 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3591,9 +3591,6 @@ extern bool empty_inline_dir(struct inode *dir, int *has_inline_data); extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval); -extern int ext4_inline_data_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - int *has_inline, __u64 start, __u64 len); extern void *ext4_read_inline_link(struct inode *inode); struct iomap; -- cgit v1.2.3 From 7177dd009c7c04290891e9a534cd47d1b620bd04 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sun, 11 Sep 2022 12:52:04 +0800 Subject: ext4: fix dir corruption when ext4_dx_add_entry() fails Following process may lead to fs corruption: 1. ext4_create(dir/foo) ext4_add_nondir ext4_add_entry ext4_dx_add_entry a. add_dirent_to_buf ext4_mark_inode_dirty ext4_handle_dirty_metadata // dir inode bh is recorded into journal b. ext4_append // dx_get_count(entries) == dx_get_limit(entries) ext4_bread(EXT4_GET_BLOCKS_CREATE) ext4_getblk ext4_map_blocks ext4_ext_map_blocks ext4_mb_new_blocks dquot_alloc_block dquot_alloc_space_nodirty inode_add_bytes // update dir's i_blocks ext4_ext_insert_extent ext4_ext_dirty // record extent bh into journal ext4_handle_dirty_metadata(bh) // record new block into journal inode->i_size += inode->i_sb->s_blocksize // new size(in mem) c. ext4_handle_dirty_dx_node(bh2) // record dir's new block(dx_node) into journal d. ext4_handle_dirty_dx_node((frame - 1)->bh) e. ext4_handle_dirty_dx_node(frame->bh) f. do_split // ret err! g. add_dirent_to_buf ext4_mark_inode_dirty(dir) // update raw_inode on disk(skipped) 2. fsck -a /dev/sdb drop last block(dx_node) which beyonds dir's i_size. /dev/sdb: recovering journal /dev/sdb contains a file system with errors, check forced. /dev/sdb: Inode 12, end of extent exceeds allowed value (logical block 128, physical block 3938, len 1) 3. fsck -fn /dev/sdb dx_node->entry[i].blk > dir->i_size Pass 2: Checking directory structure Problem in HTREE directory inode 12 (/dir): bad block number 128. Clear HTree index? no Problem in HTREE directory inode 12: block #3 has invalid depth (2) Problem in HTREE directory inode 12: block #3 has bad max hash Problem in HTREE directory inode 12: block #3 not referenced Fix it by marking inode dirty directly inside ext4_append(). Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216466 Cc: stable@vger.kernel.org Signed-off-by: Zhihao Cheng Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220911045204.516460-1-chengzhihao1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bc2e0612ec32..4183a4cb4a21 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -85,15 +85,20 @@ static struct buffer_head *ext4_append(handle_t *handle, return bh; inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; + err = ext4_mark_inode_dirty(handle, inode); + if (err) + goto out; BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); - if (err) { - brelse(bh); - ext4_std_error(inode->i_sb, err); - return ERR_PTR(err); - } + if (err) + goto out; return bh; + +out: + brelse(bh); + ext4_std_error(inode->i_sb, err); + return ERR_PTR(err); } static int ext4_dx_csum_verify(struct inode *inode, -- cgit v1.2.3 From ccbf8eeb39f2ff00b54726a2b20b35d788c4ecb5 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 14 Sep 2022 18:08:59 +0800 Subject: ext4: fix miss release buffer head in ext4_fc_write_inode In 'ext4_fc_write_inode' function first call 'ext4_get_inode_loc' get 'iloc', after use it miss release 'iloc.bh'. So just release 'iloc.bh' before 'ext4_fc_write_inode' return. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220914100859.1415196-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 2af962cbb835..b7414a5812f6 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -874,22 +874,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); + ret = -ECANCELED; dst = ext4_fc_reserve_space(inode->i_sb, sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); if (!dst) - return -ECANCELED; + goto err; if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) - return -ECANCELED; + goto err; dst += sizeof(tl); if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) - return -ECANCELED; + goto err; dst += sizeof(fc_inode); if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc), inode_len, crc)) - return -ECANCELED; - - return 0; + goto err; + ret = 0; +err: + brelse(iloc.bh); + return ret; } /* -- cgit v1.2.3 From b7b80a35fb51319223e1fbf84128b8e5ebb91f86 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 16 Sep 2022 16:38:35 +0800 Subject: ext4: factor out ext4_fc_disabled() Factor out ext4_fc_disabled(). No functional change. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916083836.388347-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index b7414a5812f6..eadab945b856 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -229,6 +229,12 @@ __releases(&EXT4_SB(inode->i_sb)->s_fc_lock) finish_wait(wq, &wait.wq_entry); } +static bool ext4_fc_disabled(struct super_block *sb) +{ + return (!test_opt2(sb, JOURNAL_FAST_COMMIT) || + (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); +} + /* * Inform Ext4's fast about start of an inode update * @@ -240,8 +246,7 @@ void ext4_fc_start_update(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; restart: @@ -265,8 +270,7 @@ void ext4_fc_stop_update(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (atomic_dec_and_test(&ei->i_fc_updates)) @@ -283,8 +287,7 @@ void ext4_fc_del(struct inode *inode) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_fc_dentry_update *fc_dentry; - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; restart: @@ -337,8 +340,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl struct ext4_sb_info *sbi = EXT4_SB(sb); tid_t tid; - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(sb)) return; ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); @@ -493,10 +495,8 @@ void __ext4_fc_track_unlink(handle_t *handle, void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) @@ -522,10 +522,8 @@ void __ext4_fc_track_link(handle_t *handle, void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) @@ -551,10 +549,8 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) @@ -576,7 +572,6 @@ static int __track_inode(struct inode *inode, void *arg, bool update) void ext4_fc_track_inode(handle_t *handle, struct inode *inode) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int ret; if (S_ISDIR(inode->i_mode)) @@ -588,8 +583,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) return; } - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) @@ -634,15 +628,13 @@ static int __track_range(struct inode *inode, void *arg, bool update) void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, ext4_lblk_t end) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct __track_range_args args; int ret; if (S_ISDIR(inode->i_mode)) return; - if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) || - (sbi->s_mount_state & EXT4_FC_REPLAY)) + if (ext4_fc_disabled(inode->i_sb)) return; if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) -- cgit v1.2.3 From e64e6ca90913b0dfe14bf7d529df0753a6746e23 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 16 Sep 2022 16:38:36 +0800 Subject: ext4: adjust fast commit disable judgement order in ext4_fc_track_inode If fastcommit is already disabled, there isn't need to mark inode ineligible. So move 'ext4_fc_disabled()' judgement bofore 'ext4_should_journal_data(inode)' judgement which can avoid to do meaningless judgement. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916083836.388347-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index eadab945b856..9217a588afd1 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -577,15 +577,15 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (S_ISDIR(inode->i_mode)) return; + if (ext4_fc_disabled(inode->i_sb)) + return; + if (ext4_should_journal_data(inode)) { ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); return; } - if (ext4_fc_disabled(inode->i_sb)) - return; - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) return; -- cgit v1.2.3 From 43bd6f1b49b61f43de4d4e33661b8dbe8c911f14 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:12 +0800 Subject: ext4: goto right label 'failed_mount3a' Before these two branches neither loaded the journal nor created the xattr cache. So the right label to goto is 'failed_mount3a'. Although this did not cause any issues because the error handler validated if the pointer is null. However this still made me confused when reading the code. So it's still worth to modify to goto the right label. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-2-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4eb18c4c52d7..016b3410e915 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5053,30 +5053,30 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); - goto failed_mount_wq; + goto failed_mount3a; } else { /* Nojournal mode, all journal mount options are illegal */ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_checksum, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { ext4_msg(sb, KERN_ERR, "can't mount with " "commit=%lu, fs mounted w/o journal", sbi->s_commit_interval / HZ); - goto failed_mount_wq; + goto failed_mount3a; } if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { ext4_msg(sb, KERN_ERR, "can't mount with " "data=, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); -- cgit v1.2.3 From a5fc51193507e10a0733c40282e71216dd0486ff Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:13 +0800 Subject: ext4: remove cantfind_ext4 error handler The 'cantfind_ext4' error handler is just a error msg print and then goto failed_mount. This two level goto makes the code complex and not easy to read. The only benefit is that is saves a little bit code. However some branches can merge and some branches dot not even need it. So do some refactor and remove it. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-3-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 016b3410e915..34a261f6b47e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4344,8 +4344,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) es = (struct ext4_super_block *) (bh->b_data + offset); sbi->s_es = es; sb->s_magic = le16_to_cpu(es->s_magic); - if (sb->s_magic != EXT4_SUPER_MAGIC) - goto cantfind_ext4; + if (sb->s_magic != EXT4_SUPER_MAGIC) { + if (!silent) + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); + goto failed_mount; + } + sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); /* Warn if metadata_csum and gdt_csum are both set. */ @@ -4358,8 +4362,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (!ext4_verify_csum_type(sb, es)) { ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "unknown checksum algorithm."); - silent = 1; - goto cantfind_ext4; + goto failed_mount; } ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, ext4_orphan_file_block_trigger); @@ -4377,9 +4380,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (!ext4_superblock_csum_verify(sb, es)) { ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); - silent = 1; ret = -EFSBADCRC; - goto cantfind_ext4; + goto failed_mount; } /* Precompute checksum seed for all metadata */ @@ -4772,8 +4774,11 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) - goto cantfind_ext4; + if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) { + if (!silent) + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); + goto failed_mount; + } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", @@ -4870,9 +4875,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (EXT4_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext4; - /* check blocks count against device size */ blocks_count = sb_bdev_nr_blocks(sb); if (blocks_count && ext4_blocks_count(es) > blocks_count) { @@ -5382,11 +5384,6 @@ no_journal: return 0; -cantfind_ext4: - if (!silent) - ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); - goto failed_mount; - failed_mount9: ext4_release_orphan_info(sb); failed_mount8: -- cgit v1.2.3 From 5f6d662d12b889ab91cc25fb19afa5699fb05bcb Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:14 +0800 Subject: ext4: factor out ext4_set_def_opts() Factor out ext4_set_def_opts(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-4-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 105 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 49 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 34a261f6b47e..8d890672e5de 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4282,6 +4282,61 @@ err_out: return NULL; } +static void ext4_set_def_opts(struct super_block *sb, + struct ext4_super_block *es) +{ + unsigned long def_mount_opts; + + /* Set defaults before we parse the mount options */ + def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + set_opt(sb, INIT_INODE_TABLE); + if (def_mount_opts & EXT4_DEFM_DEBUG) + set_opt(sb, DEBUG); + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) + set_opt(sb, GRPID); + if (def_mount_opts & EXT4_DEFM_UID16) + set_opt(sb, NO_UID32); + /* xattr user namespace & acls are now defaulted on */ + set_opt(sb, XATTR_USER); +#ifdef CONFIG_EXT4_FS_POSIX_ACL + set_opt(sb, POSIX_ACL); +#endif + if (ext4_has_feature_fast_commit(sb)) + set_opt2(sb, JOURNAL_FAST_COMMIT); + /* don't forget to enable journal_csum when metadata_csum is enabled. */ + if (ext4_has_metadata_csum(sb)) + set_opt(sb, JOURNAL_CHECKSUM); + + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) + set_opt(sb, JOURNAL_DATA); + else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) + set_opt(sb, ORDERED_DATA); + else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) + set_opt(sb, WRITEBACK_DATA); + + if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC) + set_opt(sb, ERRORS_PANIC); + else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE) + set_opt(sb, ERRORS_CONT); + else + set_opt(sb, ERRORS_RO); + /* block_validity enabled by default; disable with noblock_validity */ + set_opt(sb, BLOCK_VALIDITY); + if (def_mount_opts & EXT4_DEFM_DISCARD) + set_opt(sb, DISCARD); + + if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) + set_opt(sb, BARRIER); + + /* + * enable delayed allocation by default + * Use -o nodelalloc to turn it off + */ + if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && + ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) + set_opt(sb, DELALLOC); +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4291,7 +4346,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_fsblk_t block; ext4_fsblk_t logical_sb_block; unsigned long offset = 0; - unsigned long def_mount_opts; struct inode *root; int ret = -ENOMEM; int blocksize, clustersize; @@ -4391,43 +4445,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); - /* Set defaults before we parse the mount options */ - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - set_opt(sb, INIT_INODE_TABLE); - if (def_mount_opts & EXT4_DEFM_DEBUG) - set_opt(sb, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) - set_opt(sb, GRPID); - if (def_mount_opts & EXT4_DEFM_UID16) - set_opt(sb, NO_UID32); - /* xattr user namespace & acls are now defaulted on */ - set_opt(sb, XATTR_USER); -#ifdef CONFIG_EXT4_FS_POSIX_ACL - set_opt(sb, POSIX_ACL); -#endif - if (ext4_has_feature_fast_commit(sb)) - set_opt2(sb, JOURNAL_FAST_COMMIT); - /* don't forget to enable journal_csum when metadata_csum is enabled. */ - if (ext4_has_metadata_csum(sb)) - set_opt(sb, JOURNAL_CHECKSUM); - - if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) - set_opt(sb, JOURNAL_DATA); - else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) - set_opt(sb, ORDERED_DATA); - else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) - set_opt(sb, WRITEBACK_DATA); - - if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) - set_opt(sb, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) - set_opt(sb, ERRORS_CONT); - else - set_opt(sb, ERRORS_RO); - /* block_validity enabled by default; disable with noblock_validity */ - set_opt(sb, BLOCK_VALIDITY); - if (def_mount_opts & EXT4_DEFM_DISCARD) - set_opt(sb, DISCARD); + ext4_set_def_opts(sb, es); sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); @@ -4435,17 +4453,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; - if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) - set_opt(sb, BARRIER); - - /* - * enable delayed allocation by default - * Use -o nodelalloc to turn it off - */ - if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && - ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) - set_opt(sb, DELALLOC); - /* * set default s_li_wait_mult for lazyinit, for the case there is * no mount option specified. -- cgit v1.2.3 From 4a8557b094671de1c986f2764cf4d1965541dda7 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:15 +0800 Subject: ext4: factor out ext4_handle_clustersize() Factor out ext4_handle_clustersize(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-5-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 110 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 49 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8d890672e5de..b3900bd0657d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4337,6 +4337,64 @@ static void ext4_set_def_opts(struct super_block *sb, set_opt(sb, DELALLOC); } +static int ext4_handle_clustersize(struct super_block *sb, int blocksize) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int clustersize; + + /* Handle clustersize */ + clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); + if (ext4_has_feature_bigalloc(sb)) { + if (clustersize < blocksize) { + ext4_msg(sb, KERN_ERR, + "cluster size (%d) smaller than " + "block size (%d)", clustersize, blocksize); + return -EINVAL; + } + sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - + le32_to_cpu(es->s_log_block_size); + sbi->s_clusters_per_group = + le32_to_cpu(es->s_clusters_per_group); + if (sbi->s_clusters_per_group > blocksize * 8) { + ext4_msg(sb, KERN_ERR, + "#clusters per group too big: %lu", + sbi->s_clusters_per_group); + return -EINVAL; + } + if (sbi->s_blocks_per_group != + (sbi->s_clusters_per_group * (clustersize / blocksize))) { + ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " + "clusters per group (%lu) inconsistent", + sbi->s_blocks_per_group, + sbi->s_clusters_per_group); + return -EINVAL; + } + } else { + if (clustersize != blocksize) { + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%d)", clustersize, blocksize); + return -EINVAL; + } + if (sbi->s_blocks_per_group > blocksize * 8) { + ext4_msg(sb, KERN_ERR, + "#blocks per group too big: %lu", + sbi->s_blocks_per_group); + return -EINVAL; + } + sbi->s_clusters_per_group = sbi->s_blocks_per_group; + sbi->s_cluster_bits = 0; + } + sbi->s_cluster_ratio = clustersize / blocksize; + + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); + + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4348,7 +4406,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) unsigned long offset = 0; struct inode *root; int ret = -ENOMEM; - int blocksize, clustersize; + int blocksize; unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; @@ -4821,54 +4879,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } } - /* Handle clustersize */ - clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); - if (ext4_has_feature_bigalloc(sb)) { - if (clustersize < blocksize) { - ext4_msg(sb, KERN_ERR, - "cluster size (%d) smaller than " - "block size (%d)", clustersize, blocksize); - goto failed_mount; - } - sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - - le32_to_cpu(es->s_log_block_size); - sbi->s_clusters_per_group = - le32_to_cpu(es->s_clusters_per_group); - if (sbi->s_clusters_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#clusters per group too big: %lu", - sbi->s_clusters_per_group); - goto failed_mount; - } - if (sbi->s_blocks_per_group != - (sbi->s_clusters_per_group * (clustersize / blocksize))) { - ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " - "clusters per group (%lu) inconsistent", - sbi->s_blocks_per_group, - sbi->s_clusters_per_group); - goto failed_mount; - } - } else { - if (clustersize != blocksize) { - ext4_msg(sb, KERN_ERR, - "fragment/cluster size (%d) != " - "block size (%d)", clustersize, blocksize); - goto failed_mount; - } - if (sbi->s_blocks_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#blocks per group too big: %lu", - sbi->s_blocks_per_group); - goto failed_mount; - } - sbi->s_clusters_per_group = sbi->s_blocks_per_group; - sbi->s_cluster_bits = 0; - } - sbi->s_cluster_ratio = clustersize / blocksize; - - /* Do we have standard group size of clustersize * 8 blocks ? */ - if (sbi->s_blocks_per_group == clustersize << 3) - set_opt2(sb, STD_GROUP_SIZE); + if (ext4_handle_clustersize(sb, blocksize)) + goto failed_mount; /* * Test whether we have more sectors than will fit in sector_t, -- cgit v1.2.3 From f7314a67322ef04a4df51fbf6dc2b77fe71ef9ad Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:16 +0800 Subject: ext4: factor out ext4_fast_commit_init() Factor out ext4_fast_commit_init(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-6-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b3900bd0657d..2325cfe23e4a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4395,6 +4395,30 @@ static int ext4_handle_clustersize(struct super_block *sb, int blocksize) return 0; } +static void ext4_fast_commit_init(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* Initialize fast commit stuff */ + atomic_set(&sbi->s_fc_subtid, 0); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); + sbi->s_fc_bytes = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + sbi->s_fc_ineligible_tid = 0; + spin_lock_init(&sbi->s_fc_lock); + memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); + sbi->s_fc_replay_state.fc_regions = NULL; + sbi->s_fc_replay_state.fc_regions_size = 0; + sbi->s_fc_replay_state.fc_regions_used = 0; + sbi->s_fc_replay_state.fc_regions_valid = 0; + sbi->s_fc_replay_state.fc_modified_inodes = NULL; + sbi->s_fc_replay_state.fc_modified_inodes_size = 0; + sbi->s_fc_replay_state.fc_modified_inodes_used = 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -5033,24 +5057,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - /* Initialize fast commit stuff */ - atomic_set(&sbi->s_fc_subtid, 0); - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); - sbi->s_fc_bytes = 0; - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - sbi->s_fc_ineligible_tid = 0; - spin_lock_init(&sbi->s_fc_lock); - memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); - sbi->s_fc_replay_state.fc_regions = NULL; - sbi->s_fc_replay_state.fc_regions_size = 0; - sbi->s_fc_replay_state.fc_regions_used = 0; - sbi->s_fc_replay_state.fc_regions_valid = 0; - sbi->s_fc_replay_state.fc_modified_inodes = NULL; - sbi->s_fc_replay_state.fc_modified_inodes_size = 0; - sbi->s_fc_replay_state.fc_modified_inodes_used = 0; + ext4_fast_commit_init(sb); sb->s_root = NULL; -- cgit v1.2.3 From 0e495f7cc3f91d8473f16045c61c60bffd95639c Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:17 +0800 Subject: ext4: factor out ext4_inode_info_init() Factor out ext4_inode_info_init(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-7-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 137 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 62 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2325cfe23e4a..2f7818bac768 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4419,6 +4419,79 @@ static void ext4_fast_commit_init(struct super_block *sb) sbi->s_fc_replay_state.fc_modified_inodes_used = 0; } +static int ext4_inode_info_init(struct super_block *sb, + struct ext4_super_block *es, + int blocksize) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { + sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; + sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; + } else { + sbi->s_inode_size = le16_to_cpu(es->s_inode_size); + sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + return -EINVAL; + } + if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || + (!is_power_of_2(sbi->s_inode_size)) || + (sbi->s_inode_size > blocksize)) { + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", + sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); + return -EINVAL; + } + /* + * i_atime_extra is the last extra field available for + * [acm]times in struct ext4_inode. Checking for that + * field should suffice to ensure we have extra space + * for all three. + */ + if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) + + sizeof(((struct ext4_inode *)0)->i_atime_extra)) { + sb->s_time_gran = 1; + sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX; + } else { + sb->s_time_gran = NSEC_PER_SEC; + sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX; + } + sb->s_time_min = EXT4_TIMESTAMP_MIN; + } + + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (ext4_has_feature_extra_isize(sb)) { + unsigned v, max = (sbi->s_inode_size - + EXT4_GOOD_OLD_INODE_SIZE); + + v = le16_to_cpu(es->s_want_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_want_extra_isize: %d", v); + return -EINVAL; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + + v = le16_to_cpu(es->s_min_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_min_extra_isize: %d", v); + return -EINVAL; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + } + } + + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4561,68 +4634,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (blocksize == PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { - sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; - sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); - if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { - ext4_msg(sb, KERN_ERR, "invalid first ino: %u", - sbi->s_first_ino); - goto failed_mount; - } - if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || - (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { - ext4_msg(sb, KERN_ERR, - "unsupported inode size: %d", - sbi->s_inode_size); - ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); - goto failed_mount; - } - /* - * i_atime_extra is the last extra field available for - * [acm]times in struct ext4_inode. Checking for that - * field should suffice to ensure we have extra space - * for all three. - */ - if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) + - sizeof(((struct ext4_inode *)0)->i_atime_extra)) { - sb->s_time_gran = 1; - sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX; - } else { - sb->s_time_gran = NSEC_PER_SEC; - sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX; - } - sb->s_time_min = EXT4_TIMESTAMP_MIN; - } - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - if (ext4_has_feature_extra_isize(sb)) { - unsigned v, max = (sbi->s_inode_size - - EXT4_GOOD_OLD_INODE_SIZE); - - v = le16_to_cpu(es->s_want_extra_isize); - if (v > max) { - ext4_msg(sb, KERN_ERR, - "bad s_want_extra_isize: %d", v); - goto failed_mount; - } - if (sbi->s_want_extra_isize < v) - sbi->s_want_extra_isize = v; - - v = le16_to_cpu(es->s_min_extra_isize); - if (v > max) { - ext4_msg(sb, KERN_ERR, - "bad s_min_extra_isize: %d", v); - goto failed_mount; - } - if (sbi->s_want_extra_isize < v) - sbi->s_want_extra_isize = v; - } - } + if (ext4_inode_info_init(sb, es, blocksize)) + goto failed_mount; err = parse_apply_sb_mount_options(sb, ctx); if (err < 0) -- cgit v1.2.3 From 39c135b08c8220dd1dca885e73eae3f3de5fa082 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:18 +0800 Subject: ext4: factor out ext4_encoding_init() Factor out ext4_encoding_init(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916141527.1012715-8-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 86 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 36 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2f7818bac768..3a480a20de4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4492,6 +4492,54 @@ static int ext4_inode_info_init(struct super_block *sb, return 0; } +#if IS_ENABLED(CONFIG_UNICODE) +static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) +{ + const struct ext4_sb_encodings *encoding_info; + struct unicode_map *encoding; + __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); + + if (!ext4_has_feature_casefold(sb) || sb->s_encoding) + return 0; + + encoding_info = ext4_sb_read_encoding(es); + if (!encoding_info) { + ext4_msg(sb, KERN_ERR, + "Encoding requested by superblock is unknown"); + return -EINVAL; + } + + encoding = utf8_load(encoding_info->version); + if (IS_ERR(encoding)) { + ext4_msg(sb, KERN_ERR, + "can't mount with superblock charset: %s-%u.%u.%u " + "not supported by the kernel. flags: 0x%x.", + encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); + return -EINVAL; + } + ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " + "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); + + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; + + return 0; +} +#else +static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) +{ + return 0; +} +#endif + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4649,42 +4697,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_apply_options(fc, sb); -#if IS_ENABLED(CONFIG_UNICODE) - if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { - const struct ext4_sb_encodings *encoding_info; - struct unicode_map *encoding; - __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); - - encoding_info = ext4_sb_read_encoding(es); - if (!encoding_info) { - ext4_msg(sb, KERN_ERR, - "Encoding requested by superblock is unknown"); - goto failed_mount; - } - - encoding = utf8_load(encoding_info->version); - if (IS_ERR(encoding)) { - ext4_msg(sb, KERN_ERR, - "can't mount with superblock charset: %s-%u.%u.%u " - "not supported by the kernel. flags: 0x%x.", - encoding_info->name, - unicode_major(encoding_info->version), - unicode_minor(encoding_info->version), - unicode_rev(encoding_info->version), - encoding_flags); - goto failed_mount; - } - ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " - "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, - unicode_major(encoding_info->version), - unicode_minor(encoding_info->version), - unicode_rev(encoding_info->version), - encoding_flags); - - sb->s_encoding = encoding; - sb->s_encoding_flags = encoding_flags; - } -#endif + if (ext4_encoding_init(sb, es)) + goto failed_mount; if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n"); -- cgit v1.2.3 From b26458d151019ec279a2622ac6f15b5b913252ff Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:19 +0800 Subject: ext4: factor out ext4_init_metadata_csum() Factor out ext4_init_metadata_csum(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-9-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 83 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 37 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3a480a20de4d..dc32f078feae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4540,6 +4540,50 @@ static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_b } #endif +static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* Warn if metadata_csum and gdt_csum are both set. */ + if (ext4_has_feature_metadata_csum(sb) && + ext4_has_feature_gdt_csum(sb)) + ext4_warning(sb, "metadata_csum and uninit_bg are " + "redundant flags; please run fsck."); + + /* Check for a known checksum algorithm */ + if (!ext4_verify_csum_type(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "unknown checksum algorithm."); + return -EINVAL; + } + ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, + ext4_orphan_file_block_trigger); + + /* Load the checksum driver */ + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(sbi->s_chksum_driver)) { + int ret = PTR_ERR(sbi->s_chksum_driver); + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); + sbi->s_chksum_driver = NULL; + return ret; + } + + /* Check superblock checksum */ + if (!ext4_superblock_csum_verify(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "invalid superblock checksum. Run e2fsck?"); + return -EFSBADCRC; + } + + /* Precompute checksum seed for all metadata */ + if (ext4_has_feature_csum_seed(sb)) + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); + else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) + sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, + sizeof(es->s_uuid)); + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4609,44 +4653,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); - /* Warn if metadata_csum and gdt_csum are both set. */ - if (ext4_has_feature_metadata_csum(sb) && - ext4_has_feature_gdt_csum(sb)) - ext4_warning(sb, "metadata_csum and uninit_bg are " - "redundant flags; please run fsck."); - - /* Check for a known checksum algorithm */ - if (!ext4_verify_csum_type(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "unknown checksum algorithm."); - goto failed_mount; - } - ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, - ext4_orphan_file_block_trigger); - - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - ret = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto failed_mount; - } - - /* Check superblock checksum */ - if (!ext4_superblock_csum_verify(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "invalid superblock checksum. Run e2fsck?"); - ret = -EFSBADCRC; + err = ext4_init_metadata_csum(sb, es); + if (err) goto failed_mount; - } - - /* Precompute checksum seed for all metadata */ - if (ext4_has_feature_csum_seed(sb)) - sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); - else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) - sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, - sizeof(es->s_uuid)); ext4_set_def_opts(sb, es); -- cgit v1.2.3 From d7f3542b3219622308079dca521851be9b924c05 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:20 +0800 Subject: ext4: factor out ext4_check_feature_compatibility() Factor out ext4_check_feature_compatibility(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-10-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 144 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 67 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dc32f078feae..972e287160c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4584,6 +4584,82 @@ static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_blo return 0; } +static int ext4_check_feature_compatibility(struct super_block *sb, + struct ext4_super_block *es, + int silent) +{ + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && + (ext4_has_compat_features(sb) || + ext4_has_ro_compat_features(sb) || + ext4_has_incompat_features(sb))) + ext4_msg(sb, KERN_WARNING, + "feature flags set on rev 0 fs, " + "running e2fsck is recommended"); + + if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { + set_opt2(sb, HURD_COMPAT); + if (ext4_has_feature_64bit(sb)) { + ext4_msg(sb, KERN_ERR, + "The Hurd can't support 64-bit file systems"); + return -EINVAL; + } + + /* + * ea_inode feature uses l_i_version field which is not + * available in HURD_COMPAT mode. + */ + if (ext4_has_feature_ea_inode(sb)) { + ext4_msg(sb, KERN_ERR, + "ea_inode feature is not supported for Hurd"); + return -EINVAL; + } + } + + if (IS_EXT2_SB(sb)) { + if (ext2_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext2 file system " + "using the ext4 subsystem"); + else { + /* + * If we're probing be silent, if this looks like + * it's actually an ext[34] filesystem. + */ + if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) + return -EINVAL; + ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " + "to feature incompatibilities"); + return -EINVAL; + } + } + + if (IS_EXT3_SB(sb)) { + if (ext3_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext3 file system " + "using the ext4 subsystem"); + else { + /* + * If we're probing be silent, if this looks like + * it's actually an ext4 filesystem. + */ + if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) + return -EINVAL; + ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " + "to feature incompatibilities"); + return -EINVAL; + } + } + + /* + * Check feature flags regardless of the revision level, since we + * previously didn't change the revision level when setting the flags, + * so there is a chance incompat flags are set on a rev 0 filesystem. + */ + if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) + return -EINVAL; + + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4741,73 +4817,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* i_version is always enabled now */ sb->s_flags |= SB_I_VERSION; - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && - (ext4_has_compat_features(sb) || - ext4_has_ro_compat_features(sb) || - ext4_has_incompat_features(sb))) - ext4_msg(sb, KERN_WARNING, - "feature flags set on rev 0 fs, " - "running e2fsck is recommended"); - - if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { - set_opt2(sb, HURD_COMPAT); - if (ext4_has_feature_64bit(sb)) { - ext4_msg(sb, KERN_ERR, - "The Hurd can't support 64-bit file systems"); - goto failed_mount; - } - - /* - * ea_inode feature uses l_i_version field which is not - * available in HURD_COMPAT mode. - */ - if (ext4_has_feature_ea_inode(sb)) { - ext4_msg(sb, KERN_ERR, - "ea_inode feature is not supported for Hurd"); - goto failed_mount; - } - } - - if (IS_EXT2_SB(sb)) { - if (ext2_feature_set_ok(sb)) - ext4_msg(sb, KERN_INFO, "mounting ext2 file system " - "using the ext4 subsystem"); - else { - /* - * If we're probing be silent, if this looks like - * it's actually an ext[34] filesystem. - */ - if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) - goto failed_mount; - ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " - "to feature incompatibilities"); - goto failed_mount; - } - } - - if (IS_EXT3_SB(sb)) { - if (ext3_feature_set_ok(sb)) - ext4_msg(sb, KERN_INFO, "mounting ext3 file system " - "using the ext4 subsystem"); - else { - /* - * If we're probing be silent, if this looks like - * it's actually an ext4 filesystem. - */ - if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) - goto failed_mount; - ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " - "to feature incompatibilities"); - goto failed_mount; - } - } - - /* - * Check feature flags regardless of the revision level, since we - * previously didn't change the revision level when setting the flags, - * so there is a chance incompat flags are set on a rev 0 filesystem. - */ - if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) + if (ext4_check_feature_compatibility(sb, es, silent)) goto failed_mount; if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { -- cgit v1.2.3 From bc62dbf9145746c442b5b4a960e842ff7e8fdd2e Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:21 +0800 Subject: ext4: factor out ext4_geometry_check() Factor out ext4_geometry_check(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-11-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 111 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 50 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 972e287160c8..ee0ca696c05f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4660,6 +4660,66 @@ static int ext4_check_feature_compatibility(struct super_block *sb, return 0; } +static int ext4_geometry_check(struct super_block *sb, + struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + __u64 blocks_count; + + /* check blocks count against device size */ + blocks_count = sb_bdev_nr_blocks(sb); + if (blocks_count && ext4_blocks_count(es) > blocks_count) { + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", + ext4_blocks_count(es), blocks_count); + return -EINVAL; + } + + /* + * It makes no sense for the first data block to be beyond the end + * of the filesystem. + */ + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block %u is beyond end of filesystem (%llu)", + le32_to_cpu(es->s_first_data_block), + ext4_blocks_count(es)); + return -EINVAL; + } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + return -EINVAL; + } + + blocks_count = (ext4_blocks_count(es) - + le32_to_cpu(es->s_first_data_block) + + EXT4_BLOCKS_PER_GROUP(sb) - 1); + do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); + if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " + "(block count %llu, first data block %u, " + "blocks per group %lu)", blocks_count, + ext4_blocks_count(es), + le32_to_cpu(es->s_first_data_block), + EXT4_BLOCKS_PER_GROUP(sb)); + return -EINVAL; + } + sbi->s_groups_count = blocks_count; + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + return -EINVAL; + } + + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; @@ -4675,7 +4735,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; - __u64 blocks_count; int err = 0; ext4_group_t first_not_zeroed; struct ext4_fs_context *ctx = fc->fs_private; @@ -4964,57 +5023,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - /* check blocks count against device size */ - blocks_count = sb_bdev_nr_blocks(sb); - if (blocks_count && ext4_blocks_count(es) > blocks_count) { - ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " - "exceeds size of device (%llu blocks)", - ext4_blocks_count(es), blocks_count); + if (ext4_geometry_check(sb, es)) goto failed_mount; - } - /* - * It makes no sense for the first data block to be beyond the end - * of the filesystem. - */ - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " - "block %u is beyond end of filesystem (%llu)", - le32_to_cpu(es->s_first_data_block), - ext4_blocks_count(es)); - goto failed_mount; - } - if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && - (sbi->s_cluster_ratio == 1)) { - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " - "block is 0 with a 1k block and cluster size"); - goto failed_mount; - } - - blocks_count = (ext4_blocks_count(es) - - le32_to_cpu(es->s_first_data_block) + - EXT4_BLOCKS_PER_GROUP(sb) - 1); - do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); - if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " - "(block count %llu, first data block %u, " - "blocks per group %lu)", blocks_count, - ext4_blocks_count(es), - le32_to_cpu(es->s_first_data_block), - EXT4_BLOCKS_PER_GROUP(sb)); - goto failed_mount; - } - sbi->s_groups_count = blocks_count; - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, - (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { -- cgit v1.2.3 From a4e6a511d7de5d2afa4e8138146c29b926c6a449 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:22 +0800 Subject: ext4: factor out ext4_group_desc_init() and ext4_group_desc_free() Factor out ext4_group_desc_init() and ext4_group_desc_free(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-12-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 143 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 59 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ee0ca696c05f..1057149e0826 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4720,9 +4720,89 @@ static int ext4_geometry_check(struct super_block *sb, return 0; } +static void ext4_group_desc_free(struct ext4_sb_info *sbi) +{ + struct buffer_head **group_desc; + int i; + + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); +} + +static int ext4_group_desc_init(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t logical_sb_block, + ext4_group_t *first_not_zeroed) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned int db_count; + ext4_fsblk_t block; + int ret; + int i; + + db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / + EXT4_DESC_PER_BLOCK(sb); + if (ext4_has_feature_meta_bg(sb)) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { + ext4_msg(sb, KERN_WARNING, + "first meta block group too large: %u " + "(group descriptor block count %u)", + le32_to_cpu(es->s_first_meta_bg), db_count); + return -EINVAL; + } + } + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, + sizeof(struct buffer_head *), + GFP_KERNEL)); + if (sbi->s_group_desc == NULL) { + ext4_msg(sb, KERN_ERR, "not enough memory"); + return -ENOMEM; + } + + bgl_lock_init(sbi->s_blockgroup_lock); + + /* Pre-read the descriptors into the buffer cache */ + for (i = 0; i < db_count; i++) { + block = descriptor_loc(sb, logical_sb_block, i); + ext4_sb_breadahead_unmovable(sb, block); + } + + for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + + block = descriptor_loc(sb, logical_sb_block, i); + bh = ext4_sb_bread_unmovable(sb, block); + if (IS_ERR(bh)) { + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); + sbi->s_gdb_count = i; + ret = PTR_ERR(bh); + goto out; + } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); + } + sbi->s_gdb_count = db_count; + if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) { + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); + ret = -EFSCORRUPTED; + goto out; + } + return 0; +out: + ext4_group_desc_free(sbi); + return ret; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { - struct buffer_head *bh, **group_desc; + struct buffer_head *bh; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **flex_groups; @@ -4732,7 +4812,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) struct inode *root; int ret = -ENOMEM; int blocksize; - unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; int err = 0; @@ -5026,57 +5105,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (ext4_geometry_check(sb, es)) goto failed_mount; - db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); - if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) > db_count) { - ext4_msg(sb, KERN_WARNING, - "first meta block group too large: %u " - "(group descriptor block count %u)", - le32_to_cpu(es->s_first_meta_bg), db_count); - goto failed_mount; - } - } - rcu_assign_pointer(sbi->s_group_desc, - kvmalloc_array(db_count, - sizeof(struct buffer_head *), - GFP_KERNEL)); - if (sbi->s_group_desc == NULL) { - ext4_msg(sb, KERN_ERR, "not enough memory"); - ret = -ENOMEM; + err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); + if (err) goto failed_mount; - } - - bgl_lock_init(sbi->s_blockgroup_lock); - - /* Pre-read the descriptors into the buffer cache */ - for (i = 0; i < db_count; i++) { - block = descriptor_loc(sb, logical_sb_block, i); - ext4_sb_breadahead_unmovable(sb, block); - } - - for (i = 0; i < db_count; i++) { - struct buffer_head *bh; - - block = descriptor_loc(sb, logical_sb_block, i); - bh = ext4_sb_bread_unmovable(sb, block); - if (IS_ERR(bh)) { - ext4_msg(sb, KERN_ERR, - "can't read group descriptor %d", i); - db_count = i; - ret = PTR_ERR(bh); - goto failed_mount2; - } - rcu_read_lock(); - rcu_dereference(sbi->s_group_desc)[i] = bh; - rcu_read_unlock(); - } - sbi->s_gdb_count = db_count; - if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { - ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); - ret = -EFSCORRUPTED; - goto failed_mount2; - } timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); @@ -5520,13 +5551,7 @@ failed_mount3: flush_work(&sbi->s_error_work); del_timer_sync(&sbi->s_err_report); ext4_stop_mmpd(sbi); -failed_mount2: - rcu_read_lock(); - group_desc = rcu_dereference(sbi->s_group_desc); - for (i = 0; i < db_count; i++) - brelse(group_desc[i]); - kvfree(group_desc); - rcu_read_unlock(); + ext4_group_desc_free(sbi); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -- cgit v1.2.3 From 9c1dd22d742249cfae7bbf3680a7c188d194d3ce Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:23 +0800 Subject: ext4: factor out ext4_load_and_init_journal() This patch group the journal load and initialize code together and factor out ext4_load_and_init_journal(). This patch also removes the lable 'no_journal' which is not needed after refactor. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-13-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 157 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 88 insertions(+), 69 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1057149e0826..21affaa81b2e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4800,6 +4800,93 @@ out: return ret; } +static int ext4_load_and_init_journal(struct super_block *sb, + struct ext4_super_block *es, + struct ext4_fs_context *ctx) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + + err = ext4_load_journal(sb, es, ctx->journal_devnum); + if (err) + return err; + + if (ext4_has_feature_64bit(sb) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_64BIT)) { + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); + goto out; + } + + if (!set_journal_csum_feature_set(sb)) { + ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " + "feature set"); + goto out; + } + + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { + ext4_msg(sb, KERN_ERR, + "Failed to set fast commit journal feature"); + goto out; + } + + /* We have now updated the journal if required, so we can + * validate the data journaling mode. */ + switch (test_opt(sb, DATA_FLAGS)) { + case 0: + /* No mode set, assume a default based on the journal + * capabilities: ORDERED_DATA if the journal can + * cope, else JOURNAL_DATA + */ + if (jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { + set_opt(sb, ORDERED_DATA); + sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + } else { + set_opt(sb, JOURNAL_DATA); + sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + } + break; + + case EXT4_MOUNT_ORDERED_DATA: + case EXT4_MOUNT_WRITEBACK_DATA: + if (!jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { + ext4_msg(sb, KERN_ERR, "Journal does not support " + "requested data journaling mode"); + goto out; + } + break; + default: + break; + } + + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && + test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit in data=ordered mode"); + goto out; + } + + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); + + sbi->s_journal->j_submit_inode_data_buffers = + ext4_journal_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + ext4_journal_finish_inode_data_buffers; + + return 0; + +out: + /* flush s_error_work before journal destroy. */ + flush_work(&sbi->s_error_work); + jbd2_journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + return err; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh; @@ -5162,7 +5249,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - err = ext4_load_journal(sb, es, ctx->journal_devnum); + err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && @@ -5200,76 +5287,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) clear_opt2(sb, JOURNAL_FAST_COMMIT); sbi->s_journal = NULL; needs_recovery = 0; - goto no_journal; } - if (ext4_has_feature_64bit(sb) && - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_64BIT)) { - ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); - goto failed_mount_wq; - } - - if (!set_journal_csum_feature_set(sb)) { - ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " - "feature set"); - goto failed_mount_wq; - } - - if (test_opt2(sb, JOURNAL_FAST_COMMIT) && - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { - ext4_msg(sb, KERN_ERR, - "Failed to set fast commit journal feature"); - goto failed_mount_wq; - } - - /* We have now updated the journal if required, so we can - * validate the data journaling mode. */ - switch (test_opt(sb, DATA_FLAGS)) { - case 0: - /* No mode set, assume a default based on the journal - * capabilities: ORDERED_DATA if the journal can - * cope, else JOURNAL_DATA - */ - if (jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - set_opt(sb, ORDERED_DATA); - sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; - } else { - set_opt(sb, JOURNAL_DATA); - sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; - } - break; - - case EXT4_MOUNT_ORDERED_DATA: - case EXT4_MOUNT_WRITEBACK_DATA: - if (!jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - ext4_msg(sb, KERN_ERR, "Journal does not support " - "requested data journaling mode"); - goto failed_mount_wq; - } - break; - default: - break; - } - - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && - test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "journal_async_commit in data=ordered mode"); - goto failed_mount_wq; - } - - set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); - - sbi->s_journal->j_submit_inode_data_buffers = - ext4_journal_submit_inode_data_buffers; - sbi->s_journal->j_finish_inode_data_buffers = - ext4_journal_finish_inode_data_buffers; - -no_journal: if (!test_opt(sb, NO_MBCACHE)) { sbi->s_ea_block_cache = ext4_xattr_create_cache(); if (!sbi->s_ea_block_cache) { -- cgit v1.2.3 From a5991e539c9a4be0bdec2e427d833196339ec7c6 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:24 +0800 Subject: ext4: factor out ext4_journal_data_mode_check() Factor out ext4_journal_data_mode_check(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220916141527.1012715-14-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 60 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 25 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 21affaa81b2e..bec7fbff9acf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4887,6 +4887,39 @@ out: return err; } +static int ext4_journal_data_mode_check(struct super_block *sb) +{ + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with " + "data=journal disables delayed allocation, " + "dioread_nolock, O_DIRECT and fast_commit support!\n"); + /* can't mount with both data=journal and dioread_nolock. */ + clear_opt(sb, DIOREAD_NOLOCK); + clear_opt2(sb, JOURNAL_FAST_COMMIT); + if (test_opt2(sb, EXPLICIT_DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and delalloc"); + return -EINVAL; + } + if (test_opt(sb, DAX_ALWAYS)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -EINVAL; + } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } + if (test_opt(sb, DELALLOC)) + clear_opt(sb, DELALLOC); + } else { + sb->s_iflags |= SB_I_CGROUPWB; + } + + return 0; +} + static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh; @@ -5010,31 +5043,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (ext4_encoding_init(sb, es)) goto failed_mount; - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n"); - /* can't mount with both data=journal and dioread_nolock. */ - clear_opt(sb, DIOREAD_NOLOCK); - clear_opt2(sb, JOURNAL_FAST_COMMIT); - if (test_opt2(sb, EXPLICIT_DELALLOC)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and delalloc"); - goto failed_mount; - } - if (test_opt(sb, DAX_ALWAYS)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and dax"); - goto failed_mount; - } - if (ext4_has_feature_encrypt(sb)) { - ext4_msg(sb, KERN_WARNING, - "encrypted files will use data=ordered " - "instead of data journaling mode"); - } - if (test_opt(sb, DELALLOC)) - clear_opt(sb, DELALLOC); - } else { - sb->s_iflags |= SB_I_CGROUPWB; - } + if (ext4_journal_data_mode_check(sb)) + goto failed_mount; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); -- cgit v1.2.3 From a7a79c292ac3776bed20d575a96194181a397125 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:25 +0800 Subject: ext4: unify the ext4 super block loading operation Now we load the super block from the disk in two steps. First we load the super block with the default block size(EXT4_MIN_BLOCK_SIZE). Second we load the super block with the real block size. The second step is a little far from the first step. This patch move these two steps together in a new function. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916141527.1012715-15-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 186 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 80 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bec7fbff9acf..11debba7c7d1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4920,39 +4920,21 @@ static int ext4_journal_data_mode_check(struct super_block *sb) return 0; } -static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) +static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb, + int silent) { - struct buffer_head *bh; - struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups **flex_groups; - ext4_fsblk_t block; + struct ext4_super_block *es; ext4_fsblk_t logical_sb_block; unsigned long offset = 0; - struct inode *root; - int ret = -ENOMEM; + struct buffer_head *bh; + int ret = -EINVAL; int blocksize; - unsigned int i; - int needs_recovery, has_huge_files; - int err = 0; - ext4_group_t first_not_zeroed; - struct ext4_fs_context *ctx = fc->fs_private; - int silent = fc->sb_flags & SB_SILENT; - - /* Set defaults for the variables that will be set during parsing */ - if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; - sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); - - /* -EINVAL is default */ - ret = -EINVAL; blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { ext4_msg(sb, KERN_ERR, "unable to set blocksize"); - goto out_fail; + return -EINVAL; } /* @@ -4969,8 +4951,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) bh = ext4_sb_bread_unmovable(sb, logical_sb_block); if (IS_ERR(bh)) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); - ret = PTR_ERR(bh); - goto out_fail; + return PTR_ERR(bh); } /* * Note: s_es must be initialized as soon as possible because @@ -4982,9 +4963,106 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (sb->s_magic != EXT4_SUPER_MAGIC) { if (!silent) ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); - goto failed_mount; + goto out; } + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); + goto out; + } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto out; + } + + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + /* + * If the default block size is not the same as the real block size, + * we need to reload it. + */ + if (sb->s_blocksize == blocksize) { + *lsb = logical_sb_block; + sbi->s_sbh = bh; + return 0; + } + + /* + * bh must be released before kill_bdev(), otherwise + * it won't be freed and its page also. kill_bdev() + * is called by sb_set_blocksize(). + */ + brelse(bh); + /* Validate the filesystem blocksize */ + if (!sb_set_blocksize(sb, blocksize)) { + ext4_msg(sb, KERN_ERR, "bad block size %d", + blocksize); + bh = NULL; + goto out; + } + + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; + offset = do_div(logical_sb_block, blocksize); + bh = ext4_sb_bread_unmovable(sb, logical_sb_block); + if (IS_ERR(bh)) { + ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); + ret = PTR_ERR(bh); + bh = NULL; + goto out; + } + es = (struct ext4_super_block *)(bh->b_data + offset); + sbi->s_es = es; + if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { + ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!"); + goto out; + } + *lsb = logical_sb_block; + sbi->s_sbh = bh; + return 0; +out: + brelse(bh); + return ret; +} + +static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) +{ + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct flex_groups **flex_groups; + ext4_fsblk_t block; + ext4_fsblk_t logical_sb_block; + struct inode *root; + int ret = -ENOMEM; + int blocksize; + unsigned int i; + int needs_recovery, has_huge_files; + int err = 0; + ext4_group_t first_not_zeroed; + struct ext4_fs_context *ctx = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; + + /* Set defaults for the variables that will be set during parsing */ + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; + sbi->s_sectors_written_start = + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); + + /* -EINVAL is default */ + ret = -EINVAL; + err = ext4_load_super(sb, &logical_sb_block, silent); + if (err) + goto out_fail; + + es = sbi->s_es; + blocksize = sb->s_blocksize; sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); err = ext4_init_metadata_csum(sb, es); @@ -5005,23 +5083,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } - - blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize == PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); @@ -5088,40 +5149,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (sb->s_blocksize != blocksize) { - /* - * bh must be released before kill_bdev(), otherwise - * it won't be freed and its page also. kill_bdev() - * is called by sb_set_blocksize(). - */ - brelse(bh); - /* Validate the filesystem blocksize */ - if (!sb_set_blocksize(sb, blocksize)) { - ext4_msg(sb, KERN_ERR, "bad block size %d", - blocksize); - bh = NULL; - goto failed_mount; - } - - logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; - offset = do_div(logical_sb_block, blocksize); - bh = ext4_sb_bread_unmovable(sb, logical_sb_block); - if (IS_ERR(bh)) { - ext4_msg(sb, KERN_ERR, - "Can't read superblock on 2nd try"); - ret = PTR_ERR(bh); - bh = NULL; - goto failed_mount; - } - es = (struct ext4_super_block *)(bh->b_data + offset); - sbi->s_es = es; - if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - ext4_msg(sb, KERN_ERR, - "Magic mismatch, very weird!"); - goto failed_mount; - } - } - has_huge_files = ext4_has_feature_huge_file(sb); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); @@ -5158,7 +5185,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); - sbi->s_sbh = bh; sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); @@ -5595,7 +5621,7 @@ failed_mount: #endif fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */ - brelse(bh); + brelse(sbi->s_sbh); ext4_blkdev_remove(sbi); out_fail: sb->s_fs_info = NULL; -- cgit v1.2.3 From c8267c51425abd4f75d0ad07c4d3ee748908e2f8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:26 +0800 Subject: ext4: remove useless local variable 'blocksize' Since sb->s_blocksize is now initialized at the very beginning, the local variable 'blocksize' in __ext4_fill_super() is not needed now. Remove it and use sb->s_blocksize instead. Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916141527.1012715-16-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 11debba7c7d1..9db5be55de40 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4337,7 +4337,7 @@ static void ext4_set_def_opts(struct super_block *sb, set_opt(sb, DELALLOC); } -static int ext4_handle_clustersize(struct super_block *sb, int blocksize) +static int ext4_handle_clustersize(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -4346,24 +4346,24 @@ static int ext4_handle_clustersize(struct super_block *sb, int blocksize) /* Handle clustersize */ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); if (ext4_has_feature_bigalloc(sb)) { - if (clustersize < blocksize) { + if (clustersize < sb->s_blocksize) { ext4_msg(sb, KERN_ERR, "cluster size (%d) smaller than " - "block size (%d)", clustersize, blocksize); + "block size (%lu)", clustersize, sb->s_blocksize); return -EINVAL; } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group); - if (sbi->s_clusters_per_group > blocksize * 8) { + if (sbi->s_clusters_per_group > sb->s_blocksize * 8) { ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu", sbi->s_clusters_per_group); return -EINVAL; } if (sbi->s_blocks_per_group != - (sbi->s_clusters_per_group * (clustersize / blocksize))) { + (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) { ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " "clusters per group (%lu) inconsistent", sbi->s_blocks_per_group, @@ -4371,13 +4371,13 @@ static int ext4_handle_clustersize(struct super_block *sb, int blocksize) return -EINVAL; } } else { - if (clustersize != blocksize) { + if (clustersize != sb->s_blocksize) { ext4_msg(sb, KERN_ERR, "fragment/cluster size (%d) != " - "block size (%d)", clustersize, blocksize); + "block size (%lu)", clustersize, sb->s_blocksize); return -EINVAL; } - if (sbi->s_blocks_per_group > blocksize * 8) { + if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { ext4_msg(sb, KERN_ERR, "#blocks per group too big: %lu", sbi->s_blocks_per_group); @@ -4386,7 +4386,7 @@ static int ext4_handle_clustersize(struct super_block *sb, int blocksize) sbi->s_clusters_per_group = sbi->s_blocks_per_group; sbi->s_cluster_bits = 0; } - sbi->s_cluster_ratio = clustersize / blocksize; + sbi->s_cluster_ratio = clustersize / sb->s_blocksize; /* Do we have standard group size of clustersize * 8 blocks ? */ if (sbi->s_blocks_per_group == clustersize << 3) @@ -4420,8 +4420,7 @@ static void ext4_fast_commit_init(struct super_block *sb) } static int ext4_inode_info_init(struct super_block *sb, - struct ext4_super_block *es, - int blocksize) + struct ext4_super_block *es) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -4438,11 +4437,11 @@ static int ext4_inode_info_init(struct super_block *sb, } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { + (sbi->s_inode_size > sb->s_blocksize)) { ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); - ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); + ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize); return -EINVAL; } /* @@ -5039,7 +5038,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_fsblk_t logical_sb_block; struct inode *root; int ret = -ENOMEM; - int blocksize; unsigned int i; int needs_recovery, has_huge_files; int err = 0; @@ -5062,7 +5060,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto out_fail; es = sbi->s_es; - blocksize = sb->s_blocksize; sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); err = ext4_init_metadata_csum(sb, es); @@ -5083,10 +5080,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (blocksize == PAGE_SIZE) + if (sb->s_blocksize == PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); - if (ext4_inode_info_init(sb, es, blocksize)) + if (ext4_inode_info_init(sb, es)) goto failed_mount; err = parse_apply_sb_mount_options(sb, ctx); @@ -5116,7 +5113,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (ext4_check_feature_compatibility(sb, es, silent)) goto failed_mount; - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); @@ -5124,7 +5121,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } if (sbi->s_daxdev) { - if (blocksize == PAGE_SIZE) + if (sb->s_blocksize == PAGE_SIZE) set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); else ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); @@ -5170,21 +5167,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); + sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) { if (!silent) ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || - sbi->s_inodes_per_group > blocksize * 8) { + sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; - sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); + sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb); sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); @@ -5210,7 +5207,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } } - if (ext4_handle_clustersize(sb, blocksize)) + if (ext4_handle_clustersize(sb)) goto failed_mount; /* @@ -5343,7 +5340,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } } - if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { + if (ext4_has_feature_verity(sb) && sb->s_blocksize != PAGE_SIZE) { ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); goto failed_mount_wq; } -- cgit v1.2.3 From 3df11e27f02c3dea5a3a0e45628d67f0d82c3c5e Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 16 Sep 2022 22:15:27 +0800 Subject: ext4: move DIOREAD_NOLOCK setting to ext4_set_def_opts() Now since all preparations is done, we can move the DIOREAD_NOLOCK setting to ext4_set_def_opts(). Suggested-by: Ritesh Harjani (IBM) Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220916141527.1012715-17-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9db5be55de40..d733db8a0b02 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4335,6 +4335,9 @@ static void ext4_set_def_opts(struct super_block *sb, if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC); + + if (sb->s_blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); } static int ext4_handle_clustersize(struct super_block *sb) @@ -5080,9 +5083,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (sb->s_blocksize == PAGE_SIZE) - set_opt(sb, DIOREAD_NOLOCK); - if (ext4_inode_info_init(sb, es)) goto failed_mount; -- cgit v1.2.3 From 78ed9354c57f31000e88783496fc84dce7c8021c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 18 Sep 2022 19:52:19 +0800 Subject: ext4: remove redundant checking in ext4_ioctl_checkpoint It is already checked after comment "check for invalid bits set", so let's remove this one. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20220918115219.12407-1-guoqing.jiang@linux.dev Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ad3a294a88eb..4d49c5cfb690 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1063,9 +1063,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) if (!EXT4_SB(sb)->s_journal) return -ENODEV; - if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) - return -EINVAL; - if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) return -EOPNOTSUPP; -- cgit v1.2.3 From 9305721a309fa1bd7c194e0d4a2335bf3b29dca4 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:38 +0800 Subject: ext4: fix potential memory leak in ext4_fc_record_modified_inode() As krealloc may return NULL, in this case 'state->fc_modified_inodes' may not be freed by krealloc, but 'state->fc_modified_inodes' already set NULL. Then will lead to 'state->fc_modified_inodes' memory leak. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9217a588afd1..9555ab816d7d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1486,13 +1486,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, + int *fc_modified_inodes; + + fc_modified_inodes = krealloc(state->fc_modified_inodes, sizeof(int) * (state->fc_modified_inodes_size + EXT4_FC_REPLAY_REALLOC_INCREMENT), GFP_KERNEL); - if (!state->fc_modified_inodes) + if (!fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes = fc_modified_inodes; state->fc_modified_inodes_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; } -- cgit v1.2.3 From 7069d105c1f15c442b68af43f7fde784f3126739 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:39 +0800 Subject: ext4: fix potential memory leak in ext4_fc_record_regions() As krealloc may return NULL, in this case 'state->fc_regions' may not be freed by krealloc, but 'state->fc_regions' already set NULL. Then will lead to 'state->fc_regions' memory leak. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9555ab816d7d..5ab58cb4ce8d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1679,15 +1679,17 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, if (replay && state->fc_regions_used != state->fc_regions_valid) state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { + struct ext4_fc_alloc_region *fc_regions; + state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; - state->fc_regions = krealloc( - state->fc_regions, - state->fc_regions_size * - sizeof(struct ext4_fc_alloc_region), - GFP_KERNEL); - if (!state->fc_regions) + fc_regions = krealloc(state->fc_regions, + state->fc_regions_size * + sizeof(struct ext4_fc_alloc_region), + GFP_KERNEL); + if (!fc_regions) return -ENOMEM; + state->fc_regions = fc_regions; } region = &state->fc_regions[state->fc_regions_used++]; region->ino = ino; -- cgit v1.2.3 From 27cd49780381c6ccbf248798e5e8fd076200ffba Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 21 Sep 2022 14:40:40 +0800 Subject: ext4: update 'state->fc_regions_size' after successful memory allocation To avoid to 'state->fc_regions_size' mismatch with 'state->fc_regions' when fail to reallocate 'fc_reqions',only update 'state->fc_regions_size' after 'state->fc_regions' is allocated successfully. Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220921064040.3693255-4-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5ab58cb4ce8d..9549d89b3519 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1681,14 +1681,15 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, if (state->fc_regions_used == state->fc_regions_size) { struct ext4_fc_alloc_region *fc_regions; - state->fc_regions_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; fc_regions = krealloc(state->fc_regions, - state->fc_regions_size * - sizeof(struct ext4_fc_alloc_region), + sizeof(struct ext4_fc_alloc_region) * + (state->fc_regions_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), GFP_KERNEL); if (!fc_regions) return -ENOMEM; + state->fc_regions_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_regions = fc_regions; } region = &state->fc_regions[state->fc_regions_used++]; -- cgit v1.2.3 From b6a750c01925954d0dc6c360e91b5fcb53664a26 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 24 Sep 2022 10:12:10 +0800 Subject: ext4: remove unnecessary drop path references in mext_check_coverage() According to Jan Kara's suggestion: "The use in mext_check_coverage() can be actually removed - get_ext_path() -> ext4_find_extent() takes care of dropping the references." So remove unnecessary call ext4_ext_drop_refs() in mext_check_coverage(). Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220924021211.3831551-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2b83621d16e2..93ceee6d4caa 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -103,7 +103,6 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, if (unwritten != ext4_ext_is_unwritten(ext)) goto out; from += ext4_ext_get_actual_len(ext); - ext4_ext_drop_refs(path); } ret = 1; out: -- cgit v1.2.3 From 7ff5fddaddf2cc8d394f71e68648e9d8d7e41da8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 24 Sep 2022 10:12:11 +0800 Subject: ext4: factor out ext4_free_ext_path() Factor out ext4_free_ext_path() to free extent path. As after previous patch 'ext4_ext_drop_refs()' is only used in 'extents.c', so make it static. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220924021211.3831551-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 107 +++++++++++++++++++---------------------------- fs/ext4/extents_status.c | 3 +- fs/ext4/fast_commit.c | 6 +-- fs/ext4/migrate.c | 3 +- fs/ext4/move_extent.c | 9 ++-- fs/ext4/verity.c | 6 +-- 7 files changed, 54 insertions(+), 82 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a9015cec2c39..9a3521e95f00 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3709,7 +3709,7 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *, extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, struct ext4_ext_path **, int flags); -extern void ext4_ext_drop_refs(struct ext4_ext_path *); +extern void ext4_free_ext_path(struct ext4_ext_path *); extern int ext4_ext_check_inode(struct inode *inode); extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5235974126bd..f1956288307f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -106,6 +106,25 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) return 0; } +static void ext4_ext_drop_refs(struct ext4_ext_path *path) +{ + int depth, i; + + if (!path) + return; + depth = path->p_depth; + for (i = 0; i <= depth; i++, path++) { + brelse(path->p_bh); + path->p_bh = NULL; + } +} + +void ext4_free_ext_path(struct ext4_ext_path *path) +{ + ext4_ext_drop_refs(path); + kfree(path); +} + /* * Make sure 'handle' has at least 'check_cred' credits. If not, restart * transaction with 'restart_cred' credits. The function drops i_data_sem @@ -636,8 +655,7 @@ int ext4_ext_precache(struct inode *inode) ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED); out: up_read(&ei->i_data_sem); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return ret; } @@ -724,19 +742,6 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, #define ext4_ext_show_move(inode, path, newblock, level) #endif -void ext4_ext_drop_refs(struct ext4_ext_path *path) -{ - int depth, i; - - if (!path) - return; - depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) { - brelse(path->p_bh); - path->p_bh = NULL; - } -} - /* * ext4_ext_binsearch_idx: * binary search for the closest index of the given block @@ -955,8 +960,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, return path; err: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (orig_path) *orig_path = NULL; return ERR_PTR(ret); @@ -2174,8 +2178,7 @@ merge: err = ext4_ext_dirty(handle, inode, path + path->p_depth); cleanup: - ext4_ext_drop_refs(npath); - kfree(npath); + ext4_free_ext_path(npath); return err; } @@ -3061,8 +3064,7 @@ again: } } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); path = NULL; if (err == -EAGAIN) goto again; @@ -4375,8 +4377,7 @@ got_allocated_blocks: allocated = map->m_len; ext4_ext_show_leaf(inode, path); out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); @@ -5245,8 +5246,7 @@ again: break; } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return ret; } @@ -5538,15 +5538,13 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) EXT4_GET_BLOCKS_METADATA_NOFAIL); } - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (ret < 0) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } } else { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } ret = ext4_es_remove_extent(inode, offset_lblk, @@ -5766,10 +5764,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, count -= len; repeat: - ext4_ext_drop_refs(path1); - kfree(path1); - ext4_ext_drop_refs(path2); - kfree(path2); + ext4_free_ext_path(path1); + ext4_free_ext_path(path2); path1 = path2 = NULL; } return replaced_count; @@ -5848,8 +5844,7 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return err ? err : mapped; } @@ -5916,8 +5911,7 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]); up_write(&EXT4_I(inode)->i_data_sem); out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); ext4_mark_inode_dirty(NULL, inode); return ret; } @@ -5935,8 +5929,7 @@ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end) return; ex = path[path->p_depth].p_ext; if (!ex) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); ext4_mark_inode_dirty(NULL, inode); return; } @@ -5949,8 +5942,7 @@ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end) ext4_ext_dirty(NULL, inode, &path[path->p_depth]); up_write(&EXT4_I(inode)->i_data_sem); ext4_mark_inode_dirty(NULL, inode); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } } @@ -5989,13 +5981,11 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); goto out; } end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); /* Count the number of data blocks */ cur = 0; @@ -6025,30 +6015,26 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) if (IS_ERR(path)) goto out; numblks += path->p_depth; - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); while (cur < end) { path = ext4_find_extent(inode, cur, NULL, 0); if (IS_ERR(path)) break; ex = path[path->p_depth].p_ext; if (!ex) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return 0; } cur = max(cur + 1, le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)); ret = skip_hole(inode, &cur); if (ret < 0) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); break; } path2 = ext4_find_extent(inode, cur, NULL, 0); if (IS_ERR(path2)) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); break; } for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { @@ -6062,10 +6048,8 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) if (cmp1 != cmp2 && cmp2 != 0) numblks++; } - ext4_ext_drop_refs(path); - ext4_ext_drop_refs(path2); - kfree(path); - kfree(path2); + ext4_free_ext_path(path); + ext4_free_ext_path(path2); } out: @@ -6092,13 +6076,11 @@ int ext4_ext_clear_bb(struct inode *inode) return PTR_ERR(path); ex = path[path->p_depth].p_ext; if (!ex) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return 0; } end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); cur = 0; while (cur < end) { @@ -6117,8 +6099,7 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_fc_record_regions(inode->i_sb, inode->i_ino, 0, path[j].p_block, 1, 1); } - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); ext4_fc_record_regions(inode->i_sb, inode->i_ino, diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 23167efda95e..cd0a861853e3 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -667,8 +667,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, } } out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } static void ext4_es_insert_extent_ind_check(struct inode *inode, diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 9549d89b3519..54ccc61c713a 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1770,8 +1770,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_insert_extent( NULL, inode, &path, &newex, 0); up_write((&EXT4_I(inode)->i_data_sem)); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (ret) goto out; goto next; @@ -1926,8 +1925,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 1); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); } cur += ret; ext4_mb_mark_bb(inode->i_sb, map.m_pblk, diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 54e7d3c95fd7..0a220ec9862d 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -56,8 +56,7 @@ static int finish_range(handle_t *handle, struct inode *inode, retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); err_out: up_write((&EXT4_I(inode)->i_data_sem)); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); lb->first_pblock = 0; return retval; } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 93ceee6d4caa..044e34cd835c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -32,8 +32,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, if (IS_ERR(path)) return PTR_ERR(path); if (path[ext_depth(inode)].p_ext == NULL) { - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); *ppath = NULL; return -ENODATA; } @@ -106,8 +105,7 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, } ret = 1; out: - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return ret; } @@ -691,8 +689,7 @@ out: ext4_discard_preallocations(donor_inode, 0); } - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); unlock_two_nondirectories(orig_inode, donor_inode); diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index b051d19b5c8a..20cadfb740dc 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -298,16 +298,14 @@ static int ext4_get_verity_descriptor_location(struct inode *inode, last_extent = path[path->p_depth].p_ext; if (!last_extent) { EXT4_ERROR_INODE(inode, "verity file has no extents"); - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); return -EFSCORRUPTED; } end_lblk = le32_to_cpu(last_extent->ee_block) + ext4_ext_get_actual_len(last_extent); desc_size_pos = (u64)end_lblk << inode->i_blkbits; - ext4_ext_drop_refs(path); - kfree(path); + ext4_free_ext_path(path); if (desc_size_pos < sizeof(desc_size_disk)) goto bad; -- cgit v1.2.3 From fdc2a3c75dd8345c5b48718af90bad1a7811bedb Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 24 Sep 2022 15:52:31 +0800 Subject: ext4: introduce EXT4_FC_TAG_BASE_LEN helper Introduce EXT4_FC_TAG_BASE_LEN helper for calculate length of struct ext4_fc_tl. Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20220924075233.2315259-2-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 54 ++++++++++++++++++++++++++------------------------- fs/ext4/fast_commit.h | 3 +++ 2 files changed, 31 insertions(+), 26 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 54ccc61c713a..a65772c22f6d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -702,10 +702,10 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) * After allocating len, we should have space at least for a 0 byte * padding. */ - if (len + sizeof(struct ext4_fc_tl) > bsize) + if (len + EXT4_FC_TAG_BASE_LEN > bsize) return NULL; - if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { + if (bsize - off - 1 > len + EXT4_FC_TAG_BASE_LEN) { /* * Only allocate from current buffer if we have enough space for * this request AND we have space to add a zero byte padding. @@ -722,10 +722,10 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) /* Need to add PAD tag */ tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); - pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); + pad_len = bsize - off - 1 - EXT4_FC_TAG_BASE_LEN; tl->fc_len = cpu_to_le16(pad_len); if (crc) - *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); + *crc = ext4_chksum(sbi, *crc, tl, EXT4_FC_TAG_BASE_LEN); if (pad_len > 0) ext4_fc_memzero(sb, tl + 1, pad_len, crc); ext4_fc_submit_bh(sb, false); @@ -767,7 +767,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) * ext4_fc_reserve_space takes care of allocating an extra block if * there's no enough space on this block for accommodating this tail. */ - dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); if (!dst) return -ENOSPC; @@ -777,8 +777,8 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); - dst += sizeof(tl); + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc); + dst += EXT4_FC_TAG_BASE_LEN; tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); dst += sizeof(tail.fc_tid); @@ -800,15 +800,15 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, struct ext4_fc_tl tl; u8 *dst; - dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); if (!dst) return false; tl.fc_tag = cpu_to_le16(tag); tl.fc_len = cpu_to_le16(len); - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); - ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); + ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc); return true; } @@ -820,8 +820,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, struct ext4_fc_dentry_info fcd; struct ext4_fc_tl tl; int dlen = fc_dentry->fcd_name.len; - u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, - crc); + u8 *dst = ext4_fc_reserve_space(sb, + EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); if (!dst) return false; @@ -830,8 +830,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); - ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); - dst += sizeof(tl); + ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); + dst += EXT4_FC_TAG_BASE_LEN; ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); dst += sizeof(fcd); ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); @@ -868,13 +868,13 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) ret = -ECANCELED; dst = ext4_fc_reserve_space(inode->i_sb, - sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); + EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); if (!dst) goto err; - if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) + if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc)) goto err; - dst += sizeof(tl); + dst += EXT4_FC_TAG_BASE_LEN; if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) goto err; dst += sizeof(fc_inode); @@ -2026,9 +2026,10 @@ static int ext4_fc_replay_scan(journal_t *journal, } state->fc_replay_expected_off++; - for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { - memcpy(&tl, cur, sizeof(tl)); - val = cur + sizeof(tl); + for (cur = start; cur < end; + cur = cur + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)) { + memcpy(&tl, cur, EXT4_FC_TAG_BASE_LEN); + val = cur + EXT4_FC_TAG_BASE_LEN; ext4_debug("Scan phase, tag:%s, blk %lld\n", tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); switch (le16_to_cpu(tl.fc_tag)) { @@ -2051,13 +2052,13 @@ static int ext4_fc_replay_scan(journal_t *journal, case EXT4_FC_TAG_PAD: state->fc_cur_tag++; state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, - sizeof(tl) + le16_to_cpu(tl.fc_len)); + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)); break; case EXT4_FC_TAG_TAIL: state->fc_cur_tag++; memcpy(&tail, val, sizeof(tail)); state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, - sizeof(tl) + + EXT4_FC_TAG_BASE_LEN + offsetof(struct ext4_fc_tail, fc_crc)); if (le32_to_cpu(tail.fc_tid) == expected_tid && @@ -2084,7 +2085,7 @@ static int ext4_fc_replay_scan(journal_t *journal, } state->fc_cur_tag++; state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, - sizeof(tl) + le16_to_cpu(tl.fc_len)); + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)); break; default: ret = state->fc_replay_num_tags ? @@ -2139,9 +2140,10 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, start = (u8 *)bh->b_data; end = (__u8 *)bh->b_data + journal->j_blocksize - 1; - for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { - memcpy(&tl, cur, sizeof(tl)); - val = cur + sizeof(tl); + for (cur = start; cur < end; + cur = cur + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)) { + memcpy(&tl, cur, EXT4_FC_TAG_BASE_LEN); + val = cur + EXT4_FC_TAG_BASE_LEN; if (state->fc_replay_num_tags == 0) { ret = JBD2_FC_REPLAY_STOP; diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 1db12847a83b..a6154c3ed135 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -70,6 +70,9 @@ struct ext4_fc_tail { __le32 fc_crc; }; +/* Tag base length */ +#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl)) + /* * Fast commit status codes */ -- cgit v1.2.3 From dcc5827484d6e53ccda12334f8bbfafcc593ceda Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 24 Sep 2022 15:52:32 +0800 Subject: ext4: factor out ext4_fc_get_tl() Factor out ext4_fc_get_tl() to fill 'tl' with host byte order. Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20220924075233.2315259-3-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index a65772c22f6d..54622005a0c8 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1338,7 +1338,7 @@ struct dentry_info_args { }; static inline void tl_to_darg(struct dentry_info_args *darg, - struct ext4_fc_tl *tl, u8 *val) + struct ext4_fc_tl *tl, u8 *val) { struct ext4_fc_dentry_info fcd; @@ -1347,8 +1347,14 @@ static inline void tl_to_darg(struct dentry_info_args *darg, darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); darg->ino = le32_to_cpu(fcd.fc_ino); darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); - darg->dname_len = le16_to_cpu(tl->fc_len) - - sizeof(struct ext4_fc_dentry_info); + darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); +} + +static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val) +{ + memcpy(tl, val, EXT4_FC_TAG_BASE_LEN); + tl->fc_len = le16_to_cpu(tl->fc_len); + tl->fc_tag = le16_to_cpu(tl->fc_tag); } /* Unlink replay function */ @@ -1513,7 +1519,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, struct ext4_inode *raw_fc_inode; struct inode *inode = NULL; struct ext4_iloc iloc; - int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); + int inode_len, ino, ret, tag = tl->fc_tag; struct ext4_extent_header *eh; memcpy(&fc_inode, val, sizeof(fc_inode)); @@ -1538,7 +1544,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, if (ret) goto out; - inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); + inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); raw_inode = ext4_raw_inode(&iloc); memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); @@ -2027,12 +2033,12 @@ static int ext4_fc_replay_scan(journal_t *journal, state->fc_replay_expected_off++; for (cur = start; cur < end; - cur = cur + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)) { - memcpy(&tl, cur, EXT4_FC_TAG_BASE_LEN); + cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { + ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; ext4_debug("Scan phase, tag:%s, blk %lld\n", - tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); - switch (le16_to_cpu(tl.fc_tag)) { + tag2str(tl.fc_tag), bh->b_blocknr); + switch (tl.fc_tag) { case EXT4_FC_TAG_ADD_RANGE: memcpy(&ext, val, sizeof(ext)); ex = (struct ext4_extent *)&ext.fc_ex; @@ -2052,7 +2058,7 @@ static int ext4_fc_replay_scan(journal_t *journal, case EXT4_FC_TAG_PAD: state->fc_cur_tag++; state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, - EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)); + EXT4_FC_TAG_BASE_LEN + tl.fc_len); break; case EXT4_FC_TAG_TAIL: state->fc_cur_tag++; @@ -2085,7 +2091,7 @@ static int ext4_fc_replay_scan(journal_t *journal, } state->fc_cur_tag++; state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, - EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)); + EXT4_FC_TAG_BASE_LEN + tl.fc_len); break; default: ret = state->fc_replay_num_tags ? @@ -2141,8 +2147,8 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, end = (__u8 *)bh->b_data + journal->j_blocksize - 1; for (cur = start; cur < end; - cur = cur + EXT4_FC_TAG_BASE_LEN + le16_to_cpu(tl.fc_len)) { - memcpy(&tl, cur, EXT4_FC_TAG_BASE_LEN); + cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { + ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; if (state->fc_replay_num_tags == 0) { @@ -2150,10 +2156,9 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, ext4_fc_set_bitmaps_and_counters(sb); break; } - ext4_debug("Replay phase, tag:%s\n", - tag2str(le16_to_cpu(tl.fc_tag))); + ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); state->fc_replay_num_tags--; - switch (le16_to_cpu(tl.fc_tag)) { + switch (tl.fc_tag) { case EXT4_FC_TAG_LINK: ret = ext4_fc_replay_link(sb, &tl, val); break; @@ -2174,19 +2179,18 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, break; case EXT4_FC_TAG_PAD: trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, - le16_to_cpu(tl.fc_len), 0); + tl.fc_len, 0); break; case EXT4_FC_TAG_TAIL: - trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, - le16_to_cpu(tl.fc_len), 0); + trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, + 0, tl.fc_len, 0); memcpy(&tail, val, sizeof(tail)); WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); break; case EXT4_FC_TAG_HEAD: break; default: - trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, - le16_to_cpu(tl.fc_len), 0); + trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); ret = -ECANCELED; break; } -- cgit v1.2.3 From 1b45cc5c7b920fd8bf72e5a888ec7abeadf41e09 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 24 Sep 2022 15:52:33 +0800 Subject: ext4: fix potential out of bound read in ext4_fc_replay_scan() For scan loop must ensure that at least EXT4_FC_TAG_BASE_LEN space. If remain space less than EXT4_FC_TAG_BASE_LEN which will lead to out of bound read when mounting corrupt file system image. ADD_RANGE/HEAD/TAIL is needed to add extra check when do journal scan, as this three tags will read data during scan, tag length couldn't less than data length which will read. Cc: stable@kernel.org Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20220924075233.2315259-4-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 54622005a0c8..ef05bfa87798 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1976,6 +1976,34 @@ void ext4_fc_replay_cleanup(struct super_block *sb) kfree(sbi->s_fc_replay_state.fc_modified_inodes); } +static inline bool ext4_fc_tag_len_isvalid(struct ext4_fc_tl *tl, + u8 *val, u8 *end) +{ + if (val + tl->fc_len > end) + return false; + + /* Here only check ADD_RANGE/TAIL/HEAD which will read data when do + * journal rescan before do CRC check. Other tags length check will + * rely on CRC check. + */ + switch (tl->fc_tag) { + case EXT4_FC_TAG_ADD_RANGE: + return (sizeof(struct ext4_fc_add_range) == tl->fc_len); + case EXT4_FC_TAG_TAIL: + return (sizeof(struct ext4_fc_tail) <= tl->fc_len); + case EXT4_FC_TAG_HEAD: + return (sizeof(struct ext4_fc_head) == tl->fc_len); + case EXT4_FC_TAG_DEL_RANGE: + case EXT4_FC_TAG_LINK: + case EXT4_FC_TAG_UNLINK: + case EXT4_FC_TAG_CREAT: + case EXT4_FC_TAG_INODE: + case EXT4_FC_TAG_PAD: + default: + return true; + } +} + /* * Recovery Scan phase handler * @@ -2032,10 +2060,15 @@ static int ext4_fc_replay_scan(journal_t *journal, } state->fc_replay_expected_off++; - for (cur = start; cur < end; + for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; + if (!ext4_fc_tag_len_isvalid(&tl, val, end)) { + ret = state->fc_replay_num_tags ? + JBD2_FC_REPLAY_STOP : -ECANCELED; + goto out_err; + } ext4_debug("Scan phase, tag:%s, blk %lld\n", tag2str(tl.fc_tag), bh->b_blocknr); switch (tl.fc_tag) { @@ -2146,7 +2179,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, start = (u8 *)bh->b_data; end = (__u8 *)bh->b_data + journal->j_blocksize - 1; - for (cur = start; cur < end; + for (cur = start; cur < end - EXT4_FC_TAG_BASE_LEN; cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ext4_fc_get_tl(&tl, cur); val = cur + EXT4_FC_TAG_BASE_LEN; @@ -2156,6 +2189,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, ext4_fc_set_bitmaps_and_counters(sb); break; } + ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); state->fc_replay_num_tags--; switch (tl.fc_tag) { -- cgit v1.2.3