diff options
author | Linus Torvalds | 2012-12-12 09:19:45 -0800 |
---|---|---|
committer | Linus Torvalds | 2012-12-12 09:19:45 -0800 |
commit | 3f1c64f410e4394ecefadd7a597a7c20368a65fc (patch) | |
tree | 10f15d6a222b15a34831f2d7d1e3ac26f1436638 /fs/xfs/xfs_super.c | |
parent | 22a40fd9a60388aec8106b0baffc8f59f83bb1b4 (diff) | |
parent | f9668a09e32ac6d2aa22f44cc310e430a8f4a40f (diff) |
Merge tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Ben Myers:
"There is plenty going on, including the cleanup of xfssyncd, metadata
verifiers, CRC infrastructure for the log, tracking of inodes with
speculative allocation, a cleanup of xfs_fs_subr.c, fixes for
XFS_IOC_ZERO_RANGE, and important fix related to log replay (only
update the last_sync_lsn when a transaction completes), a fix for
deadlock on AGF buffers, documentation and comment updates, and a few
more cleanups and fixes.
Details:
- remove the xfssyncd mess
- only update the last_sync_lsn when a transaction completes
- zero allocation_args on the kernel stack
- fix AGF/alloc workqueue deadlock
- silence uninitialised f.file warning
- Update inode alloc comments
- Update mount options documentation
- report projid32bit feature in geometry call
- speculative preallocation inode tracking
- fix attr tree double split corruption
- fix broken error handling in xfs_vm_writepage
- drop buffer io reference when a bad bio is built
- add more attribute tree trace points
- growfs infrastructure changes for 3.8
- fs/xfs/xfs_fs_subr.c die die die
- add CRC infrastructure
- add CRC checks to the log
- Remove description of nodelaylog mount option from xfs.txt
- inode allocation should use unmapped buffers
- byte range granularity for XFS_IOC_ZERO_RANGE
- fix direct IO nested transaction deadlock
- fix stray dquot unlock when reclaiming dquots
- fix sparse reported log CRC endian issue"
Fix up trivial conflict in fs/xfs/xfs_fsops.c due to the same patch
having been applied twice (commits eaef854335ce and 1375cb65e87b: "xfs:
growfs: don't read garbage for new secondary superblocks") with later
updates to the affected code in the XFS tree.
* tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs: (78 commits)
xfs: fix sparse reported log CRC endian issue
xfs: fix stray dquot unlock when reclaiming dquots
xfs: fix direct IO nested transaction deadlock.
xfs: byte range granularity for XFS_IOC_ZERO_RANGE
xfs: inode allocation should use unmapped buffers.
xfs: Remove the description of nodelaylog mount option from xfs.txt
xfs: add CRC checks to the log
xfs: add CRC infrastructure
xfs: convert buffer verifiers to an ops structure.
xfs: connect up write verifiers to new buffers
xfs: add pre-write metadata buffer verifier callbacks
xfs: add buffer pre-write callback
xfs: Add verifiers to dir2 data readahead.
xfs: add xfs_da_node verification
xfs: factor and verify attr leaf reads
xfs: factor dir2 leaf read
xfs: factor out dir2 data block reading
xfs: factor dir2 free block reading
xfs: verify dir2 block format buffers
xfs: factor dir2 block read operations
...
Diffstat (limited to 'fs/xfs/xfs_super.c')
-rw-r--r-- | fs/xfs/xfs_super.c | 148 |
1 files changed, 102 insertions, 46 deletions
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 26a09bd7f975..ab8839b26272 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -49,7 +49,7 @@ #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" -#include "xfs_sync.h" +#include "xfs_icache.h" #include "xfs_trace.h" #include <linux/namei.h> @@ -863,8 +863,30 @@ xfs_init_mount_workqueues( WQ_MEM_RECLAIM, 0, mp->m_fsname); if (!mp->m_cil_workqueue) goto out_destroy_unwritten; + + mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", + WQ_NON_REENTRANT, 0, mp->m_fsname); + if (!mp->m_reclaim_workqueue) + goto out_destroy_cil; + + mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", + WQ_NON_REENTRANT, 0, mp->m_fsname); + if (!mp->m_log_workqueue) + goto out_destroy_reclaim; + + mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", + WQ_NON_REENTRANT, 0, mp->m_fsname); + if (!mp->m_eofblocks_workqueue) + goto out_destroy_log; + return 0; +out_destroy_log: + destroy_workqueue(mp->m_log_workqueue); +out_destroy_reclaim: + destroy_workqueue(mp->m_reclaim_workqueue); +out_destroy_cil: + destroy_workqueue(mp->m_cil_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); out_destroy_data_iodone_queue: @@ -877,11 +899,32 @@ STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { + destroy_workqueue(mp->m_eofblocks_workqueue); + destroy_workqueue(mp->m_log_workqueue); + destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_cil_workqueue); destroy_workqueue(mp->m_data_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); } +/* + * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK + * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting + * for IO to complete so that we effectively throttle multiple callers to the + * rate at which IO is completing. + */ +void +xfs_flush_inodes( + struct xfs_mount *mp) +{ + struct super_block *sb = mp->m_super; + + if (down_read_trylock(&sb->s_umount)) { + sync_inodes_sb(sb); + up_read(&sb->s_umount); + } +} + /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -1006,9 +1049,8 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); xfs_filestream_unmount(mp); - cancel_delayed_work_sync(&mp->m_sync_work); xfs_unmountfs(mp); - xfs_syncd_stop(mp); + xfs_freesb(mp); xfs_icsb_destroy_counters(mp); xfs_destroy_mount_workqueues(mp); @@ -1023,7 +1065,6 @@ xfs_fs_sync_fs( int wait) { struct xfs_mount *mp = XFS_M(sb); - int error; /* * Doing anything during the async pass would be counterproductive. @@ -1031,17 +1072,14 @@ xfs_fs_sync_fs( if (!wait) return 0; - error = xfs_quiesce_data(mp); - if (error) - return -error; - + xfs_log_force(mp, XFS_LOG_SYNC); if (laptop_mode) { /* * The disk must be active because we're syncing. - * We schedule xfssyncd now (now that the disk is + * We schedule log work now (now that the disk is * active) instead of later (when it might not be). */ - flush_delayed_work(&mp->m_sync_work); + flush_delayed_work(&mp->m_log->l_work); } return 0; @@ -1118,6 +1156,48 @@ xfs_restore_resvblks(struct xfs_mount *mp) xfs_reserve_blocks(mp, &resblks, NULL); } +/* + * Trigger writeback of all the dirty metadata in the file system. + * + * This ensures that the metadata is written to their location on disk rather + * than just existing in transactions in the log. This means after a quiesce + * there is no log replay required to write the inodes to disk - this is the + * primary difference between a sync and a quiesce. + * + * Note: xfs_log_quiesce() stops background log work - the callers must ensure + * it is started again when appropriate. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* force the log to unpin objects from the now complete transactions */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* reclaim inodes to do any IO before the freeze completes */ + xfs_reclaim_inodes(mp, 0); + xfs_reclaim_inodes(mp, SYNC_WAIT); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp); + if (error) + xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + /* + * Just warn here till VFS can correctly support + * read-only remount without racing. + */ + WARN_ON(atomic_read(&mp->m_active_trans) != 0); + + xfs_log_quiesce(mp); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1198,20 +1278,18 @@ xfs_fs_remount( * value if it is non-zero, otherwise go with the default. */ xfs_restore_resvblks(mp); + xfs_log_work_queue(mp); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { /* - * After we have synced the data but before we sync the - * metadata, we need to free up the reserve block pool so that - * the used block count in the superblock on disk is correct at - * the end of the remount. Stash the current reserve pool size - * so that if we get remounted rw, we can return it to the same - * size. + * Before we sync the metadata, we need to free up the reserve + * block pool so that the used block count in the superblock on + * disk is correct at the end of the remount. Stash the current + * reserve pool size so that if we get remounted rw, we can + * return it to the same size. */ - - xfs_quiesce_data(mp); xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; @@ -1243,6 +1321,7 @@ xfs_fs_unfreeze( struct xfs_mount *mp = XFS_M(sb); xfs_restore_resvblks(mp); + xfs_log_work_queue(mp); return 0; } @@ -1321,6 +1400,8 @@ xfs_fs_fill_super( spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); mp->m_super = sb; sb->s_fs_info = mp; @@ -1371,10 +1452,6 @@ xfs_fs_fill_super( /* * we must configure the block size in the superblock before we run the * full mount process as the mount process can lookup and cache inodes. - * For the same reason we must also initialise the syncd and register - * the inode cache shrinker so that inodes can be reclaimed during - * operations like a quotacheck that iterate all inodes in the - * filesystem. */ sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; @@ -1384,13 +1461,9 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_syncd_init(mp); - if (error) - goto out_filestream_unmount; - error = xfs_mountfs(mp); if (error) - goto out_syncd_stop; + goto out_filestream_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { @@ -1408,8 +1481,7 @@ xfs_fs_fill_super( } return 0; - out_syncd_stop: - xfs_syncd_stop(mp); + out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: @@ -1429,7 +1501,6 @@ out_destroy_workqueues: out_unmount: xfs_filestream_unmount(mp); xfs_unmountfs(mp); - xfs_syncd_stop(mp); goto out_free_sb; } @@ -1625,16 +1696,6 @@ STATIC int __init xfs_init_workqueues(void) { /* - * We never want to the same work item to run twice, reclaiming inodes - * or idling the log is not going to get any faster by multiple CPUs - * competing for ressources. Use the default large max_active value - * so that even lots of filesystems can perform these task in parallel. - */ - xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); - if (!xfs_syncd_wq) - return -ENOMEM; - - /* * The allocation workqueue can be used in memory reclaim situations * (writepage path), and parallelism is only limited by the number of * AGs in all the filesystems mounted. Hence use the default large @@ -1642,20 +1703,15 @@ xfs_init_workqueues(void) */ xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); if (!xfs_alloc_wq) - goto out_destroy_syncd; + return -ENOMEM; return 0; - -out_destroy_syncd: - destroy_workqueue(xfs_syncd_wq); - return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { destroy_workqueue(xfs_alloc_wq); - destroy_workqueue(xfs_syncd_wq); } STATIC int __init |