From 8cef107a1d890ff76c85d665083ff3562be01d32 Mon Sep 17 00:00:00 2001 From: Frans van de Wiel Date: Mon, 15 Mar 2010 19:29:34 +0100 Subject: ext3: Avoid loading bitmaps for full groups during block allocation There is no point in loading bitmap for groups which are completely full. This causes noticeable performance problems (and memory pressure) on small systems with large full filesystem (http://marc.info/?l=linux-ext4&m=126843108314310&w=2). Jan Kara: Added a comment and changed check to use cpu-endian value. Signed-off-by: "Frans van de Wiel" Signed-off-by: Jan Kara --- fs/ext3/balloc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/ext3') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a177122a1b25..4a32511f4ded 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1583,6 +1583,12 @@ retry_alloc: if (!gdp) goto io_error; free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group (and avoid loading bitmap) if there + * are no free blocks + */ + if (!free_blocks) + continue; /* * skip this group if the number of * free blocks is less than half of the reservation -- cgit v1.2.3 From 41d1a636b813867339db52e12377ca132d54700f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 12 Apr 2010 23:46:00 +0400 Subject: ext3: init statistics after journal recovery v2 Currently block/inode/dir counters are initialized before journal was recovered. In fact after journal recovery this info will probably change which results in incorrect numbers returned from statfs(2). BUG:#15768 Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext3/super.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 1bee604cc6cd..6b6e49de0916 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1890,21 +1890,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); - if (!err) { - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - } - if (err) { - ext3_msg(sb, KERN_ERR, "error: insufficient memory"); - goto failed_mount3; - } - /* per fileystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); sbi->s_rsv_window_root = RB_ROOT; @@ -1945,15 +1930,29 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount3; + goto failed_mount2; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) - goto failed_mount3; + goto failed_mount2; } else { if (!silent) ext3_msg(sb, KERN_ERR, "error: no journal found. " "mounting ext3 over ext2?"); + goto failed_mount2; + } + err = percpu_counter_init(&sbi->s_freeblocks_counter, + ext3_count_free_blocks(sb)); + if (!err) { + err = percpu_counter_init(&sbi->s_freeinodes_counter, + ext3_count_free_inodes(sb)); + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + } + if (err) { + ext3_msg(sb, KERN_ERR, "error: insufficient memory"); goto failed_mount3; } @@ -1978,7 +1977,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ext3_msg(sb, KERN_ERR, "error: journal does not support " "requested data journaling mode"); - goto failed_mount4; + goto failed_mount3; } default: break; @@ -2001,19 +2000,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (IS_ERR(root)) { ext3_msg(sb, KERN_ERR, "error: get root inode failed"); ret = PTR_ERR(root); - goto failed_mount4; + goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); - goto failed_mount4; + goto failed_mount3; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); iput(root); ret = -ENOMEM; - goto failed_mount4; + goto failed_mount3; } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -2039,12 +2038,11 @@ cantfind_ext3: sb->s_id); goto failed_mount; -failed_mount4: - journal_destroy(sbi->s_journal); failed_mount3: percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); + journal_destroy(sbi->s_journal); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); -- cgit v1.2.3 From 5277970878a32e437b27296e34c592e5d351f11d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Apr 2010 22:24:26 +0200 Subject: ext3: Fix waiting on transaction during fsync log_start_commit() returns 1 only when it started a transaction commit. Thus in case transaction commit is already running, we fail to wait for the commit to finish. Fix the issue by always waiting for the commit regardless of the log_start_commit return value. Signed-off-by: Jan Kara --- fs/ext3/fsync.c | 20 +++++++++----------- fs/jbd/journal.c | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 8209f266e9ad..26289e8f4163 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -48,7 +48,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; - int ret = 0; + int ret, needs_barrier = 0; tid_t commit_tid; if (inode->i_sb->s_flags & MS_RDONLY) @@ -70,28 +70,26 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - if (ext3_should_journal_data(inode)) { - ret = ext3_force_commit(inode->i_sb); - goto out; - } + if (ext3_should_journal_data(inode)) + return ext3_force_commit(inode->i_sb); if (datasync) commit_tid = atomic_read(&ei->i_datasync_tid); else commit_tid = atomic_read(&ei->i_sync_tid); - if (log_start_commit(journal, commit_tid)) { - log_wait_commit(journal, commit_tid); - goto out; - } + if (test_opt(inode->i_sb, BARRIER) && + !journal_trans_will_send_data_barrier(journal, commit_tid)) + needs_barrier = 1; + log_start_commit(journal, commit_tid); + ret = log_wait_commit(journal, commit_tid); /* * In case we didn't commit a transaction, we have to flush * disk caches manually so that data really is on persistent * storage */ - if (test_opt(inode->i_sb, BARRIER)) + if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); -out: return ret; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 99c71940155a..93d1e47647bd 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -594,7 +594,7 @@ out: spin_unlock(&journal->j_state_lock); return ret; } -EXPORT_SYMBOL(journal_commit_will_send_barrier); +EXPORT_SYMBOL(journal_trans_will_send_data_barrier); /* * Log buffer allocation routines: -- cgit v1.2.3 From 0636c73ee7b129f77f577aaaefc8dde057be6d18 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 30 Apr 2010 11:09:34 -0500 Subject: ext3: make barrier options consistent with ext4 ext4 was updated to accept barrier/nobarrier mount options in addition to the older barrier=0/1. The barrier story is complex enough, we should help people by making the options the same at least, even if the defaults are different. This patch allows the barrier/nobarrier mount options for ext3, while keeping nobarrier the default. It also unconditionally displays barrier status in show_options, and prints a message at mount time if barriers are not enabled, just as ext4 does. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- Documentation/filesystems/ext3.txt | 15 +++++++++++++-- fs/ext3/super.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'fs/ext3') diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 867c5b50cb42..272f80d5f966 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -59,8 +59,19 @@ commit=nrsec (*) Ext3 can be told to sync all its data and metadata Setting it to very large values will improve performance. -barrier=1 This enables/disables barriers. barrier=0 disables - it, barrier=1 enables it. +barrier=<0(*)|1> This enables/disables the use of write barriers in +barrier the jbd code. barrier=0 disables, barrier=1 enables. +nobarrier (*) This also requires an IO stack which can support + barriers, and if jbd gets an error on a barrier + write, it will disable again with a warning. + Write barriers enforce proper on-disk ordering + of journal commits, making volatile disk write caches + safe to use, at some performance penalty. If + your disks are battery-backed in one way or another, + disabling barriers may safely improve performance. + The mount options "barrier" and "nobarrier" can + also be used to enable or disable barriers, for + consistency with other ext3 mount options. orlov (*) This enables the new Orlov block allocator. It is enabled by default. diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6b6e49de0916..0fc1293d0e96 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -653,8 +653,12 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + + /* + * Always display barrier state so it's clear what the status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); @@ -810,8 +814,8 @@ enum { Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, - Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_usrquota, Opt_grpquota + Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, + Opt_resize, Opt_usrquota, Opt_grpquota }; static const match_table_t tokens = { @@ -865,6 +869,8 @@ static const match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_resize, "resize"}, {Opt_err, NULL}, }; @@ -967,7 +973,11 @@ static int parse_options (char *options, struct super_block *sb, int token; if (!*p) continue; - + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: @@ -1215,9 +1225,15 @@ set_qf_format: case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; + case Opt_nobarrier: + clear_opt(sbi->s_mount_opt, BARRIER); + break; case Opt_barrier: - if (match_int(&args[0], &option)) - return 0; + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -2276,6 +2292,9 @@ static int ext3_load_journal(struct super_block *sb, return -EINVAL; } + if (!(journal->j_flags & JFS_BARRIER)) + printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); + if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = journal_update_format(journal); if (err) { -- cgit v1.2.3 From 12755627bdcddcdb30a1bfb9a09395a52b1d6838 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 8 Apr 2010 22:04:20 +0400 Subject: quota: unify quota init condition in setattr Quota must being initialized if size or uid/git changes requested. But initialization performed in two different places: in case of i_size file system is responsible for dquot init , but in case of uid/gid init will be called internally in dquot_transfer(). This ambiguity makes code harder to understand. Let's move this logic to one common helper function. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/inode.c | 2 +- fs/jfs/file.c | 2 +- fs/ocfs2/file.c | 4 ++-- fs/quota/dquot.c | 5 ++--- fs/reiserfs/inode.c | 3 ++- fs/udf/file.c | 2 +- fs/ufs/truncate.c | 8 ++++---- include/linux/quotaops.h | 8 ++++++++ 10 files changed, 23 insertions(+), 15 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b90c3bf6e9ba..527c46d9bc1f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1466,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ea33bdf0a300..735f0190ec2a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, attr)) dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 81d605412844..3e0f6af9d08d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, attr)) dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 14ba982b3f24..85d9ec659225 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) if (rc) return rc; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f74f1400eccd..e127c53ec2e7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -966,10 +966,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (status) return status; + if (is_quota_modification(inode, attr)) + dquot_initialize(inode); size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; if (size_change) { - dquot_initialize(inode); - status = ocfs2_rw_lock(inode, 1); if (status < 0) { mlog_errno(status); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b1a5036560a9..1056a21f0300 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1822,10 +1822,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) mask |= 1 << GRPQUOTA; chid[GRPQUOTA] = iattr->ia_gid; } - if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { - dquot_initialize(inode); + if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) return __dquot_transfer(inode, chid, mask); - } + return 0; } EXPORT_SYMBOL(dquot_transfer); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index dc2c65e04853..0f22fdaf54ac 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); depth = reiserfs_write_lock_once(inode->i_sb); - if (attr->ia_valid & ATTR_SIZE) { + if (is_quota_modification(inode, attr)) dquot_initialize(inode); + if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate */ diff --git a/fs/udf/file.c b/fs/udf/file.c index 4b6a46ccbf46..6ebc043f3a2a 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -227,7 +227,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index ee8db3e77bfe..f294c44577dc 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -518,18 +518,18 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (is_quota_modification(inode, attr)) + dquot_initialize(inode); + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { error = dquot_transfer(inode, attr); if (error) return error; } - if (ia_valid & ATTR_SIZE && - attr->ia_size != i_size_read(inode)) { + if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { loff_t old_i_size = inode->i_size; - dquot_initialize(inode); - error = vmtruncate(inode, attr->ia_size); if (error) return error; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 82c70c42d035..8a7818764a67 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -14,6 +14,14 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) return &sb->s_dquot; } +/* i_mutex must being held */ +static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) +{ + return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) || + (ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || + (ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid); +} + #if defined(CONFIG_QUOTA) /* -- cgit v1.2.3