From 9bf729c0af67897ea8498ce17c29b0683f7f2028 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 29 Apr 2010 09:55:50 +1000 Subject: xfs: add a shrinker to background inode reclaim On low memory boxes or those with highmem, kernel can OOM before the background reclaims inodes via xfssyncd. Add a shrinker to run inode reclaim so that it inode reclaim is expedited when memory is low. This is more complex than it needs to be because the VM folk don't want a context added to the shrinker infrastructure. Hence we need to add a global list of XFS mount structures so the shrinker can traverse them. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 5 ++ fs/xfs/linux-2.6/xfs_sync.c | 112 ++++++++++++++++++++++++++++++++++++++++--- fs/xfs/linux-2.6/xfs_sync.h | 7 ++- 3 files changed, 116 insertions(+), 8 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 52e06b487ced..29f1edca76de 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1209,6 +1209,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_dmops_put(mp); @@ -1622,6 +1623,8 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; + xfs_inode_shrinker_register(mp); + kfree(mtpt); return 0; @@ -1867,6 +1870,7 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); + xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1894,6 +1898,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index fd9698215759..a427c638d909 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -95,7 +95,8 @@ xfs_inode_ag_walk( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { uint32_t first_index; int last_error = 0; @@ -134,7 +135,7 @@ restart: if (error == EFSCORRUPTED) break; - } while (1); + } while ((*nr_to_scan)--); if (skipped) { delay(1); @@ -150,12 +151,15 @@ xfs_inode_ag_iterator( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { int error = 0; int last_error = 0; xfs_agnumber_t ag; + int nr; + nr = nr_to_scan ? *nr_to_scan : INT_MAX; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { struct xfs_perag *pag; @@ -165,14 +169,18 @@ xfs_inode_ag_iterator( continue; } error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, - exclusive); + exclusive, &nr); xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } + if (nr <= 0) + break; } + if (nr_to_scan) + *nr_to_scan = nr; return XFS_ERROR(last_error); } @@ -291,7 +299,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); if (error) return XFS_ERROR(error); @@ -310,7 +318,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); } STATIC int @@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag( radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable++; } /* @@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag( { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable--; } /* @@ -854,5 +864,93 @@ xfs_reclaim_inodes( int mode) { return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, - XFS_ICI_RECLAIM_TAG, 1); + XFS_ICI_RECLAIM_TAG, 1, NULL); +} + +/* + * Shrinker infrastructure. + * + * This is all far more complex than it needs to be. It adds a global list of + * mounts because the shrinkers can only call a global context. We need to make + * the shrinkers pass a context to avoid the need for global state. + */ +static LIST_HEAD(xfs_mount_list); +static struct rw_semaphore xfs_mount_list_lock; + +static int +xfs_reclaim_inode_shrink( + int nr_to_scan, + gfp_t gfp_mask) +{ + struct xfs_mount *mp; + struct xfs_perag *pag; + xfs_agnumber_t ag; + int reclaimable = 0; + + if (nr_to_scan) { + if (!(gfp_mask & __GFP_FS)) + return -1; + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, + XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); + if (nr_to_scan <= 0) + break; + } + up_read(&xfs_mount_list_lock); + } + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); + continue; + } + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); + } + } + up_read(&xfs_mount_list_lock); + return reclaimable; +} + +static struct shrinker xfs_inode_shrinker = { + .shrink = xfs_reclaim_inode_shrink, + .seeks = DEFAULT_SEEKS, +}; + +void __init +xfs_inode_shrinker_init(void) +{ + init_rwsem(&xfs_mount_list_lock); + register_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_destroy(void) +{ + ASSERT(list_empty(&xfs_mount_list)); + unregister_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_register( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_add_tail(&mp->m_mplist, &xfs_mount_list); + up_write(&xfs_mount_list_lock); +} + +void +xfs_inode_shrinker_unregister( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_del(&mp->m_mplist); + up_write(&xfs_mount_list_lock); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index d480c346cabb..cdcbaaca9880 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag, int write_lock); + int flags, int tag, int write_lock, int *nr_to_scan); + +void xfs_inode_shrinker_init(void); +void xfs_inode_shrinker_destroy(void); +void xfs_inode_shrinker_register(struct xfs_mount *mp); +void xfs_inode_shrinker_unregister(struct xfs_mount *mp); #endif -- cgit v1.2.3 From fda168c24586ab8e01b0eb68028d78fe3e4fb71a Mon Sep 17 00:00:00 2001 From: Zhitong Wang Date: Tue, 23 Mar 2010 09:51:22 +1100 Subject: xfs: Fix integer overflow in fs/xfs/linux-2.6/xfs_ioctl*.c The am_hreq.opcount field in the xfs_attrmulti_by_handle() interface is not bounded correctly. The opcount is used to determine the size of the buffer required. The size is bounded, but can overflow and so the size checks may not be sufficient to catch invalid opcounts. Fix it by catching opcount values that would cause overflows before calculating the size. Signed-off-by: Zhitong Wang Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_ioctl.c | 4 ++++ fs/xfs/linux-2.6/xfs_ioctl32.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 7b26cc2fd284..699b60cbab9c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -527,6 +527,10 @@ xfs_attrmulti_by_handle( if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) + return -E2BIG; + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 593c05b4df8d..9287135e9bfc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle( sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) return -XFS_ERROR(EFAULT); + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) + return -E2BIG; + dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); -- cgit v1.2.3 From e2a07812e93d4a51b1b1a6f15145a1634948db47 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 23 Mar 2010 09:52:55 +1100 Subject: xfs: add blockdev name to kthreads This allows to see in `ps` and similar tools which kthreads are allotted to which block device/filesystem, similar to what jbd2 does. As the process name is a fixed 16-char array, no extra space is needed in tasks. PID TTY STAT TIME COMMAND 2 ? S 0:00 [kthreadd] 197 ? S 0:00 \_ [jbd2/sda2-8] 198 ? S 0:00 \_ [ext4-dio-unwrit] 204 ? S 0:00 \_ [flush-8:0] 2647 ? S 0:00 \_ [xfs_mru_cache] 2648 ? S 0:00 \_ [xfslogd/0] 2649 ? S 0:00 \_ [xfsdatad/0] 2650 ? S 0:00 \_ [xfsconvertd/0] 2651 ? S 0:00 \_ [xfsbufd/ram0] 2652 ? S 0:00 \_ [xfsaild/ram0] 2653 ? S 0:00 \_ [xfssyncd/ram0] Signed-off-by: Jan Engelhardt Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_buf.c | 10 ++++++---- fs/xfs/linux-2.6/xfs_buf.h | 2 +- fs/xfs/linux-2.6/xfs_super.c | 9 +++++---- fs/xfs/linux-2.6/xfs_sync.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 44c2b0ef9a41..f7ecc44cbbd3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1614,7 +1614,8 @@ xfs_mapping_buftarg( STATIC int xfs_alloc_delwrite_queue( - xfs_buftarg_t *btp) + xfs_buftarg_t *btp, + const char *fsname) { int error = 0; @@ -1622,7 +1623,7 @@ xfs_alloc_delwrite_queue( INIT_LIST_HEAD(&btp->bt_delwrite_queue); spin_lock_init(&btp->bt_delwrite_lock); btp->bt_flags = 0; - btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); + btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); if (IS_ERR(btp->bt_task)) { error = PTR_ERR(btp->bt_task); goto out_error; @@ -1635,7 +1636,8 @@ out_error: xfs_buftarg_t * xfs_alloc_buftarg( struct block_device *bdev, - int external) + int external, + const char *fsname) { xfs_buftarg_t *btp; @@ -1647,7 +1649,7 @@ xfs_alloc_buftarg( goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; - if (xfs_alloc_delwrite_queue(btp)) + if (xfs_alloc_delwrite_queue(btp, fsname)) goto error; xfs_alloc_bufhash(btp, external); return btp; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 386e7361e50e..5fbecefa5dfd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) /* * Handling of buftargs. */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); +extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 29f1edca76de..e8ad6dd2c10c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -789,18 +789,18 @@ xfs_open_devices( * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); + mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); + mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); + mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -902,7 +902,8 @@ xfsaild_start( struct xfs_ail *ailp) { ailp->xa_target = 0; - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", + ailp->xa_mount->m_fsname); if (IS_ERR(ailp->xa_task)) return -PTR_ERR(ailp->xa_task); return 0; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a427c638d909..a7ba355c21b6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -660,7 +660,7 @@ xfs_syncd_init( mp->m_sync_work.w_syncer = xfs_sync_worker; mp->m_sync_work.w_mount = mp; mp->m_sync_work.w_completion = NULL; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); + mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); if (IS_ERR(mp->m_sync_task)) return -PTR_ERR(mp->m_sync_task); return 0; -- cgit v1.2.3 From 4aaf15d1aa9673dd2cc45c48957c946cb4aa2694 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 8 Mar 2010 11:24:07 +1100 Subject: xfs: Add inode pin counts to traces We don't record pin counts in inode events right now, and this makes it difficult to track down problems related to pinning inodes. Add the pin count to the inode trace class and add trace events for pinning and unpinning inodes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 9 ++++++++- fs/xfs/xfs_inode.c | 2 ++ fs/xfs/xfs_inode_item.c | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index fcaa62f0799e..65371859c753 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -562,18 +562,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class, __field(dev_t, dev) __field(xfs_ino_t, ino) __field(int, count) + __field(int, pincount) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, + __entry->pincount, (char *)__entry->caller_ip) ) @@ -583,6 +586,10 @@ DEFINE_EVENT(xfs_inode_class, name, \ TP_ARGS(ip, caller_ip)) DEFINE_INODE_EVENT(xfs_ihold); DEFINE_INODE_EVENT(xfs_irele); +DEFINE_INODE_EVENT(xfs_inode_pin); +DEFINE_INODE_EVENT(xfs_inode_unpin); +DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); + /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ DEFINE_INODE_EVENT(xfs_inode); #define xfs_itrace_entry(ip) \ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0ffd56447045..8cd6e8d8fe9c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2449,6 +2449,8 @@ xfs_iunpin_nowait( { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + trace_xfs_inode_unpin_nowait(ip, _RET_IP_); + /* Give the log a push to start the unpinning I/O */ xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 32e4188411c2..03471757bc88 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -543,6 +543,7 @@ xfs_inode_item_pin( { ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); + trace_xfs_inode_pin(iip->ili_inode, _RET_IP_); atomic_inc(&iip->ili_inode->i_pincount); } @@ -561,6 +562,7 @@ xfs_inode_item_unpin( { struct xfs_inode *ip = iip->ili_inode; + trace_xfs_inode_unpin(ip, _RET_IP_); ASSERT(atomic_read(&ip->i_pincount) > 0); if (atomic_dec_and_test(&ip->i_pincount)) wake_up(&ip->i_ipin_wait); -- cgit v1.2.3 From 9abbc539bf7f299819ad0a235064a1b643ab6407 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:46 +1000 Subject: xfs: add log item recovery tracing Currently there is no tracing in log recovery, so it is difficult to determine what is going on when something goes wrong. Add tracing for log item recovery to provide visibility into the log recovery process. The tracing added shows regions being extracted from the log transactions and added to the transaction hash forming recovery items, followed by the reordering, cancelling and finally recovery of the items. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.c | 3 + fs/xfs/linux-2.6/xfs_trace.h | 138 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_buf_item.h | 2 +- fs/xfs/xfs_log_recover.c | 44 +++++++++++--- fs/xfs/xfs_trans.h | 9 +++ 5 files changed, 187 insertions(+), 9 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 5a107601e969..2a460581308f 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -50,6 +50,9 @@ #include "xfs_aops.h" #include "quota/xfs_dquot_item.h" #include "quota/xfs_dquot.h" +#include "xfs_log_recover.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 65371859c753..33f7d2b7afeb 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -32,6 +32,10 @@ struct xfs_da_node_entry; struct xfs_dquot; struct xlog_ticket; struct log; +struct xlog_recover; +struct xlog_recover_item; +struct xfs_buf_log_format; +struct xfs_inode_log_format; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -1502,6 +1506,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); +DECLARE_EVENT_CLASS(xfs_log_recover_item_class, + TP_PROTO(struct log *log, struct xlog_recover *trans, + struct xlog_recover_item *item, int pass), + TP_ARGS(log, trans, item, pass), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, item) + __field(xlog_tid_t, tid) + __field(int, type) + __field(int, pass) + __field(int, count) + __field(int, total) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->item = (unsigned long)item; + __entry->tid = trans->r_log_tid; + __entry->type = ITEM_TYPE(item); + __entry->pass = pass; + __entry->count = item->ri_cnt; + __entry->total = item->ri_total; + ), + TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " + "item region count/total %d/%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tid, + __entry->pass, + (void *)__entry->item, + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __entry->count, + __entry->total) +) + +#define DEFINE_LOG_RECOVER_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_item_class, name, \ + TP_PROTO(struct log *log, struct xlog_recover *trans, \ + struct xlog_recover_item *item, int pass), \ + TP_ARGS(log, trans, item, pass)) + +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); + +DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), + TP_ARGS(log, buf_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__int64_t, blkno) + __field(unsigned short, len) + __field(unsigned short, flags) + __field(unsigned short, size) + __field(unsigned int, map_size) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->blkno = buf_f->blf_blkno; + __entry->len = buf_f->blf_len; + __entry->flags = buf_f->blf_flags; + __entry->size = buf_f->blf_size; + __entry->map_size = buf_f->blf_map_size; + ), + TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " + "map_size %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->blkno, + __entry->len, + __entry->flags, + __entry->size, + __entry->map_size) +) + +#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ + TP_ARGS(log, buf_f)) + +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); + +DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), + TP_ARGS(log, in_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, size) + __field(int, fields) + __field(unsigned short, asize) + __field(unsigned short, dsize) + __field(__int64_t, blkno) + __field(int, len) + __field(int, boffset) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->ino = in_f->ilf_ino; + __entry->size = in_f->ilf_size; + __entry->fields = in_f->ilf_fields; + __entry->asize = in_f->ilf_asize; + __entry->dsize = in_f->ilf_dsize; + __entry->blkno = in_f->ilf_blkno; + __entry->len = in_f->ilf_len; + __entry->boffset = in_f->ilf_boffset; + ), + TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " + "dsize %d, blkno 0x%llx, len %d, boffset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->fields, + __entry->asize, + __entry->dsize, + __entry->blkno, + __entry->len, + __entry->boffset) +) +#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ + TP_ARGS(log, in_f)) + +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 217f34af00cb..df4454511f73 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -26,7 +26,7 @@ extern kmem_zone_t *xfs_buf_item_zone; * have been logged. * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. */ -typedef struct xfs_buf_log_format_t { +typedef struct xfs_buf_log_format { unsigned short blf_type; /* buf log item type indicator */ unsigned short blf_size; /* size of this item */ ushort blf_flags; /* misc state */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 22e6efdc17ea..f21eb8ad2d97 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1408,6 +1408,7 @@ xlog_recover_add_item( STATIC int xlog_recover_add_to_cont_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1434,6 +1435,7 @@ xlog_recover_add_to_cont_trans( memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; + trace_xfs_log_recover_item_add_cont(log, trans, item, 0); return 0; } @@ -1452,6 +1454,7 @@ xlog_recover_add_to_cont_trans( */ STATIC int xlog_recover_add_to_trans( + struct log *log, xlog_recover_t *trans, xfs_caddr_t dp, int len) @@ -1510,6 +1513,7 @@ xlog_recover_add_to_trans( item->ri_buf[item->ri_cnt].i_addr = ptr; item->ri_buf[item->ri_cnt].i_len = len; item->ri_cnt++; + trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; } @@ -1521,7 +1525,9 @@ xlog_recover_add_to_trans( */ STATIC int xlog_recover_reorder_trans( - xlog_recover_t *trans) + struct log *log, + xlog_recover_t *trans, + int pass) { xlog_recover_item_t *item, *n; LIST_HEAD(sort_list); @@ -1535,6 +1541,8 @@ xlog_recover_reorder_trans( switch (ITEM_TYPE(item)) { case XFS_LI_BUF: if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_item_reorder_head(log, + trans, item, pass); list_move(&item->ri_list, &trans->r_itemq); break; } @@ -1543,6 +1551,8 @@ xlog_recover_reorder_trans( case XFS_LI_QUOTAOFF: case XFS_LI_EFD: case XFS_LI_EFI: + trace_xfs_log_recover_item_reorder_tail(log, + trans, item, pass); list_move_tail(&item->ri_list, &trans->r_itemq); break; default: @@ -1592,8 +1602,10 @@ xlog_recover_do_buffer_pass1( /* * If this isn't a cancel buffer item, then just return. */ - if (!(flags & XFS_BLI_CANCEL)) + if (!(flags & XFS_BLI_CANCEL)) { + trace_xfs_log_recover_buf_not_cancel(log, buf_f); return; + } /* * Insert an xfs_buf_cancel record into the hash table of @@ -1627,6 +1639,7 @@ xlog_recover_do_buffer_pass1( while (nextp != NULL) { if (nextp->bc_blkno == blkno && nextp->bc_len == len) { nextp->bc_refcount++; + trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); return; } prevp = nextp; @@ -1640,6 +1653,7 @@ xlog_recover_do_buffer_pass1( bcp->bc_refcount = 1; bcp->bc_next = NULL; prevp->bc_next = bcp; + trace_xfs_log_recover_buf_cancel_add(log, buf_f); } /* @@ -1779,6 +1793,8 @@ xlog_recover_do_inode_buffer( unsigned int *data_map = NULL; unsigned int map_size = 0; + trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -1874,6 +1890,7 @@ xlog_recover_do_inode_buffer( /*ARGSUSED*/ STATIC void xlog_recover_do_reg_buffer( + struct xfs_mount *mp, xlog_recover_item_t *item, xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) @@ -1885,6 +1902,8 @@ xlog_recover_do_reg_buffer( unsigned int map_size = 0; int error; + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); + switch (buf_f->blf_type) { case XFS_LI_BUF: data_map = buf_f->blf_data_map; @@ -2083,6 +2102,8 @@ xlog_recover_do_dquot_buffer( { uint type; + trace_xfs_log_recover_buf_dquot_buf(log, buf_f); + /* * Filesystems are required to send in quota flags at mount time. */ @@ -2103,7 +2124,7 @@ xlog_recover_do_dquot_buffer( if (log->l_quotaoffs_flag & type) return; - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } /* @@ -2164,9 +2185,11 @@ xlog_recover_do_buffer_trans( */ cancel = xlog_recover_do_buffer_pass2(log, buf_f); if (cancel) { + trace_xfs_log_recover_buf_cancel(log, buf_f); return 0; } } + trace_xfs_log_recover_buf_recover(log, buf_f); switch (buf_f->blf_type) { case XFS_LI_BUF: blkno = buf_f->blf_blkno; @@ -2204,7 +2227,7 @@ xlog_recover_do_buffer_trans( (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { - xlog_recover_do_reg_buffer(item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } if (error) return XFS_ERROR(error); @@ -2284,8 +2307,10 @@ xlog_recover_do_inode_trans( if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len, 0)) { error = 0; + trace_xfs_log_recover_inode_cancel(log, in_f); goto error; } + trace_xfs_log_recover_inode_recover(log, in_f); bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, XBF_LOCK); @@ -2337,6 +2362,7 @@ xlog_recover_do_inode_trans( /* do nothing */ } else { xfs_buf_relse(bp); + trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto error; } @@ -2758,11 +2784,12 @@ xlog_recover_do_trans( int error = 0; xlog_recover_item_t *item; - error = xlog_recover_reorder_trans(trans); + error = xlog_recover_reorder_trans(log, trans, pass); if (error) return error; list_for_each_entry(item, &trans->r_itemq, ri_list) { + trace_xfs_log_recover_item_recover(log, trans, item, pass); switch (ITEM_TYPE(item)) { case XFS_LI_BUF: error = xlog_recover_do_buffer_trans(log, item, pass); @@ -2919,8 +2946,9 @@ xlog_recover_process_data( error = xlog_recover_unmount_trans(trans); break; case XLOG_WAS_CONT_TRANS: - error = xlog_recover_add_to_cont_trans(trans, - dp, be32_to_cpu(ohead->oh_len)); + error = xlog_recover_add_to_cont_trans(log, + trans, dp, + be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: xlog_warn( @@ -2930,7 +2958,7 @@ xlog_recover_process_data( break; case 0: case XLOG_CONTINUE_TRANS: - error = xlog_recover_add_to_trans(trans, + error = xlog_recover_add_to_trans(log, trans, dp, be32_to_cpu(ohead->oh_len)); break; default: diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 82574ef36580..c62beee0921e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -49,6 +49,15 @@ typedef struct xfs_trans_header { #define XFS_LI_DQUOT 0x123d #define XFS_LI_QUOTAOFF 0x123e +#define XFS_LI_TYPE_DESC \ + { XFS_LI_EFI, "XFS_LI_EFI" }, \ + { XFS_LI_EFD, "XFS_LI_EFD" }, \ + { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \ + { XFS_LI_INODE, "XFS_LI_INODE" }, \ + { XFS_LI_BUF, "XFS_LI_BUF" }, \ + { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ + { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" } + /* * Transaction types. Used to distinguish types of buffers. */ -- cgit v1.2.3 From 368e136174344c417bad6ff0380b7b3f574bf120 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:50 +1000 Subject: xfs: remove duplicate code from dquot reclaim The dquot shaker and the free-list reclaim code use exactly the same algorithm but the code is duplicated and slightly different in each case. Make the shaker code use the single dquot reclaim code to remove the code duplication. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 2 - fs/xfs/quota/xfs_qm.c | 264 ++++++++++++------------------------------- 2 files changed, 73 insertions(+), 193 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 33f7d2b7afeb..ce6a968a8f40 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -653,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \ TP_PROTO(struct xfs_dquot *dqp), \ TP_ARGS(dqp)) DEFINE_DQUOT_EVENT(xfs_dqadjust); -DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); -DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 855827320ff6..5ca65c834bbd 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1926,53 +1926,46 @@ xfs_qm_init_quotainos( } + /* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - * XXXsup merge this with qm_reclaim_one(). + * Just pop the least recently used dquot off the freelist and + * recycle it. The returned dquot is locked. */ -STATIC int -xfs_qm_shake_freelist( - int howmany) +STATIC xfs_dquot_t * +xfs_qm_dqreclaim_one(void) { - int nreclaimed; - xfs_dqhash_t *hash; - xfs_dquot_t *dqp, *nextdqp; + xfs_dquot_t *dqpout; + xfs_dquot_t *dqp; int restarts; - int nflushes; - if (howmany <= 0) - return 0; - - nreclaimed = 0; restarts = 0; - nflushes = 0; + dqpout = NULL; -#ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); -#endif - /* lock order is : hashchainlock, freelistlock, mplistlock */ - tryagain: + /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ +startagain: xfs_qm_freelist_lock(xfs_Gqm); - for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; - ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && - nreclaimed < howmany); ) { + FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { struct xfs_mount *mp = dqp->q_mount; xfs_dqlock(dqp); /* * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. + * want to reclaim a dquot that lookup wants. We release the + * freelist lock and start over, so that lookup will grab + * both the dquot and the freelistlock. */ if (dqp->dq_flags & XFS_DQ_WANT) { + ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); + + trace_xfs_dqreclaim_want(dqp); + xfs_dqunlock(dqp); xfs_qm_freelist_unlock(xfs_Gqm); if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return nreclaimed; + return NULL; XQM_STATS_INC(xqmstats.xs_qm_dqwants); - goto tryagain; + goto startagain; } /* @@ -1985,19 +1978,22 @@ xfs_qm_shake_freelist( ASSERT(! XFS_DQ_IS_DIRTY(dqp)); ASSERT(dqp->HL_PREVP == NULL); ASSERT(list_empty(&dqp->q_mplist)); + XQM_FREELIST_REMOVE(dqp); + xfs_dqunlock(dqp); + dqpout = dqp; XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - nextdqp = dqp->dq_flnext; - goto off_freelist; + break; } + ASSERT(dqp->q_hash); ASSERT(!list_empty(&dqp->q_mplist)); + /* * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); - dqp = dqp->dq_flnext; continue; } @@ -2010,21 +2006,21 @@ xfs_qm_shake_freelist( if (XFS_DQ_IS_DIRTY(dqp)) { int error; - trace_xfs_dqshake_dirty(dqp); + trace_xfs_dqreclaim_dirty(dqp); /* * We flush it delayed write, so don't bother - * releasing the mplock. + * releasing the freelist lock. */ error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqflush_all: dquot %p flush failed", dqp); + "xfs_qm_dqreclaim: dquot %p flush failed", dqp); } xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ - dqp = dqp->dq_flnext; continue; } + /* * We're trying to get the hashlock out of order. This races * with dqlookup; so, we giveup and goto the next dquot if @@ -2033,57 +2029,71 @@ xfs_qm_shake_freelist( * waiting for the freelist lock. */ if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - dqp = dqp->dq_flnext; - continue; + restarts++; + goto dqfunlock; } + /* * This races with dquot allocation code as well as dqflush_all * and reclaim code. So, if we failed to grab the mplist lock, * giveup everything and start over. */ - hash = dqp->q_hash; - ASSERT(hash); if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - /* XXX put a sentinel so that we can come back here */ + restarts++; + mutex_unlock(&dqp->q_hash->qh_lock); xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - mutex_unlock(&hash->qh_lock); xfs_qm_freelist_unlock(xfs_Gqm); - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return nreclaimed; - goto tryagain; + if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) + return NULL; + goto startagain; } - trace_xfs_dqshake_unlink(dqp); - -#ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", - dqp, be32_to_cpu(dqp->q_core.d_id)); -#endif ASSERT(dqp->q_nrefs == 0); - nextdqp = dqp->dq_flnext; - XQM_HASHLIST_REMOVE(hash, dqp); list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dquots--; mp->m_quotainfo->qi_dqreclaims++; - xfs_dqfunlock(dqp); - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - mutex_unlock(&hash->qh_lock); - - off_freelist: + XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); XQM_FREELIST_REMOVE(dqp); + dqpout = dqp; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_unlock(&dqp->q_hash->qh_lock); +dqfunlock: + xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - nreclaimed++; - XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); - xfs_qm_dqdestroy(dqp); - dqp = nextdqp; + if (dqpout) + break; + if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + return NULL; } xfs_qm_freelist_unlock(xfs_Gqm); - return nreclaimed; + return dqpout; } +/* + * Traverse the freelist of dquots and attempt to reclaim a maximum of + * 'howmany' dquots. This operation races with dqlookup(), and attempts to + * favor the lookup function ... + */ +STATIC int +xfs_qm_shake_freelist( + int howmany) +{ + int nreclaimed = 0; + xfs_dquot_t *dqp; + + if (howmany <= 0) + return 0; + + while (nreclaimed < howmany) { + dqp = xfs_qm_dqreclaim_one(); + if (!dqp) + return nreclaimed; + xfs_qm_dqdestroy(dqp); + nreclaimed++; + } + return nreclaimed; +} /* * The kmem_shake interface is invoked when memory is running low. @@ -2115,134 +2125,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) } -/* - * Just pop the least recently used dquot off the freelist and - * recycle it. The returned dquot is locked. - */ -STATIC xfs_dquot_t * -xfs_qm_dqreclaim_one(void) -{ - xfs_dquot_t *dqpout; - xfs_dquot_t *dqp; - int restarts; - int nflushes; - - restarts = 0; - dqpout = NULL; - nflushes = 0; - - /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ - startagain: - xfs_qm_freelist_lock(xfs_Gqm); - - FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) { - struct xfs_mount *mp = dqp->q_mount; - xfs_dqlock(dqp); - - /* - * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. We release the - * freelist lock and start over, so that lookup will grab - * both the dquot and the freelistlock. - */ - if (dqp->dq_flags & XFS_DQ_WANT) { - ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - - trace_xfs_dqreclaim_want(dqp); - - xfs_dqunlock(dqp); - xfs_qm_freelist_unlock(xfs_Gqm); - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return NULL; - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - goto startagain; - } - - /* - * If the dquot is inactive, we are assured that it is - * not on the mplist or the hashlist, and that makes our - * life easier. - */ - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(mp == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); - ASSERT(list_empty(&dqp->q_mplist)); - XQM_FREELIST_REMOVE(dqp); - xfs_dqunlock(dqp); - dqpout = dqp; - XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - break; - } - - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); - - /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqreclaim: dquot %p flush failed", dqp); - } - xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ - continue; - } - - if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - continue; - } - - if (!mutex_trylock(&dqp->q_hash->qh_lock)) - goto mplistunlock; - - trace_xfs_dqreclaim_unlink(dqp); - - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); - XQM_FREELIST_REMOVE(dqp); - dqpout = dqp; - mutex_unlock(&dqp->q_hash->qh_lock); - mplistunlock: - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - if (dqpout) - break; - } - - xfs_qm_freelist_unlock(xfs_Gqm); - return dqpout; -} - - /*------------------------------------------------------------------*/ /* -- cgit v1.2.3 From e6a81f13aa9aa20ef03174210aed24791865b05e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Apr 2010 15:06:51 +1000 Subject: xfs: convert the dquot hash list to use list heads Convert the dquot hash list on the filesystem to use listhead infrastructure rather than the roll-your-own in the quota code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_trace.h | 1 - fs/xfs/quota/xfs_dquot.c | 32 ++++++++++---------------------- fs/xfs/quota/xfs_dquot.h | 9 ++------- fs/xfs/quota/xfs_qm.c | 9 +++++---- fs/xfs/quota/xfs_qm_syscalls.c | 34 +++++++++++++--------------------- fs/xfs/quota/xfs_quota_priv.h | 33 --------------------------------- 6 files changed, 30 insertions(+), 88 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index ce6a968a8f40..8a319cfd2901 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -667,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail); DEFINE_DQUOT_EVENT(xfs_dqlookup_found); DEFINE_DQUOT_EVENT(xfs_dqlookup_want); DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); -DEFINE_DQUOT_EVENT(xfs_dqlookup_move); DEFINE_DQUOT_EVENT(xfs_dqlookup_done); DEFINE_DQUOT_EVENT(xfs_dqget_hit); DEFINE_DQUOT_EVENT(xfs_dqget_miss); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 838289b92bb9..ad64ab62d9c5 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -122,8 +122,7 @@ xfs_qm_dqinit( dqp->q_nrefs = 0; dqp->q_blkno = 0; INIT_LIST_HEAD(&dqp->q_mplist); - dqp->HL_NEXT = NULL; - dqp->HL_PREVP = NULL; + INIT_LIST_HEAD(&dqp->q_hashlist); dqp->q_bufoffset = 0; dqp->q_fileoffset = 0; dqp->q_transp = NULL; @@ -752,7 +751,6 @@ xfs_qm_dqlookup( { xfs_dquot_t *dqp; uint flist_locked; - xfs_dquot_t *d; ASSERT(mutex_is_locked(&qh->qh_lock)); @@ -761,7 +759,7 @@ xfs_qm_dqlookup( /* * Traverse the hashchain looking for a match */ - for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { + list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { /* * We already have the hashlock. We don't need the * dqlock to look at the id field of the dquot, since the @@ -828,21 +826,10 @@ xfs_qm_dqlookup( * move the dquot to the front of the hashchain */ ASSERT(mutex_is_locked(&qh->qh_lock)); - if (dqp->HL_PREVP != &qh->qh_next) { - trace_xfs_dqlookup_move(dqp); - if ((d = dqp->HL_NEXT)) - d->HL_PREVP = dqp->HL_PREVP; - *(dqp->HL_PREVP) = d; - d = qh->qh_next; - d->HL_PREVP = &dqp->HL_NEXT; - dqp->HL_NEXT = d; - dqp->HL_PREVP = &qh->qh_next; - qh->qh_next = dqp; - } + list_move(&dqp->q_hashlist, &qh->qh_list); trace_xfs_dqlookup_done(dqp); *O_dqpp = dqp; - ASSERT(mutex_is_locked(&qh->qh_lock)); - return (0); + return 0; } } @@ -1034,7 +1021,8 @@ xfs_qm_dqget( */ ASSERT(mutex_is_locked(&h->qh_lock)); dqp->q_hash = h; - XQM_HASHLIST_INSERT(h, dqp); + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; /* * Attach this dquot to this filesystem's list of all dquots, @@ -1387,7 +1375,7 @@ int xfs_qm_dqpurge( xfs_dquot_t *dqp) { - xfs_dqhash_t *thishash; + xfs_dqhash_t *qh = dqp->q_hash; xfs_mount_t *mp = dqp->q_mount; ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); @@ -1453,8 +1441,8 @@ xfs_qm_dqpurge( ASSERT(XFS_FORCED_SHUTDOWN(mp) || !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); - thishash = dqp->q_hash; - XQM_HASHLIST_REMOVE(thishash, dqp); + list_del_init(&dqp->q_hashlist); + qh->qh_version++; list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dqreclaims++; mp->m_quotainfo->qi_dquots--; @@ -1470,7 +1458,7 @@ xfs_qm_dqpurge( memset(&dqp->q_core, 0, sizeof(dqp->q_core)); xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - mutex_unlock(&thishash->qh_lock); + mutex_unlock(&qh->qh_lock); return (0); } diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 6992a67c165a..169b3c24af79 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -33,17 +33,12 @@ * The hash chain headers (hash buckets) */ typedef struct xfs_dqhash { - struct xfs_dquot *qh_next; + struct list_head qh_list; struct mutex qh_lock; uint qh_version; /* ever increasing version */ uint qh_nelems; /* number of dquots on the list */ } xfs_dqhash_t; -typedef struct xfs_dqlink { - struct xfs_dquot *ql_next; /* forward link */ - struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */ -} xfs_dqlink_t; - struct xfs_mount; struct xfs_trans; @@ -57,7 +52,6 @@ struct xfs_trans; typedef struct xfs_dqmarker { struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */ struct xfs_dquot*dqm_flprev; - xfs_dqlink_t dqm_hashlist; /* link to the hash chain */ uint dqm_flags; /* various flags (XFS_DQ_*) */ } xfs_dqmarker_t; @@ -67,6 +61,7 @@ typedef struct xfs_dqmarker { typedef struct xfs_dquot { xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ struct list_head q_mplist; /* mount's list of dquots */ + struct list_head q_hashlist; /* mount's list of dquots */ xfs_dqhash_t *q_hash; /* the hashchain header */ struct xfs_mount*q_mount; /* filesystem this relates to */ struct xfs_trans*q_transp; /* trans this belongs to currently */ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5ca65c834bbd..08e97f1ef653 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -277,7 +277,7 @@ xfs_qm_rele_quotafs_ref( if (dqp->dq_flags & XFS_DQ_INACTIVE) { ASSERT(dqp->q_mount == NULL); ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); + ASSERT(list_empty(&dqp->q_hashlist)); ASSERT(list_empty(&dqp->q_mplist)); XQM_FREELIST_REMOVE(dqp); xfs_dqunlock(dqp); @@ -1176,7 +1176,7 @@ xfs_qm_list_init( int n) { mutex_init(&list->qh_lock); - list->qh_next = NULL; + INIT_LIST_HEAD(&list->qh_list); list->qh_version = 0; list->qh_nelems = 0; } @@ -1976,7 +1976,7 @@ startagain: if (dqp->dq_flags & XFS_DQ_INACTIVE) { ASSERT(mp == NULL); ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(dqp->HL_PREVP == NULL); + ASSERT(list_empty(&dqp->q_hashlist)); ASSERT(list_empty(&dqp->q_mplist)); XQM_FREELIST_REMOVE(dqp); xfs_dqunlock(dqp); @@ -2053,7 +2053,8 @@ startagain: list_del_init(&dqp->q_mplist); mp->m_quotainfo->qi_dquots--; mp->m_quotainfo->qi_dqreclaims++; - XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; XQM_FREELIST_REMOVE(dqp); dqpout = dqp; mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c54fa7790bd8..c82e319f9df4 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -932,6 +932,7 @@ struct mutex qcheck_lock; typedef struct dqtest { xfs_dqmarker_t q_lists; + struct list_head q_hashlist; xfs_dqhash_t *q_hash; /* the hashchain header */ xfs_mount_t *q_mount; /* filesystem this relates to */ xfs_dqid_t d_id; /* user id or group id */ @@ -942,14 +943,9 @@ typedef struct dqtest { STATIC void xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) { - xfs_dquot_t *d; - if (((d) = (h)->qh_next)) - (d)->HL_PREVP = &((dqp)->HL_NEXT); - (dqp)->HL_NEXT = d; - (dqp)->HL_PREVP = &((h)->qh_next); - (h)->qh_next = (xfs_dquot_t *)dqp; - (h)->qh_version++; - (h)->qh_nelems++; + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + h->qh_nelems++; } STATIC void xfs_qm_dqtest_print( @@ -1061,9 +1057,7 @@ xfs_qm_internalqcheck_dqget( xfs_dqhash_t *h; h = DQTEST_HASH(mp, id, type); - for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; - d = (xfs_dqtest_t *) d->HL_NEXT) { - /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */ + list_for_each_entry(d, &h->qh_list, q_hashlist) { if (d->d_id == id && mp == d->q_mount) { *O_dq = d; return (0); @@ -1074,6 +1068,7 @@ xfs_qm_internalqcheck_dqget( d->d_id = id; d->q_mount = mp; d->q_hash = h; + INIT_LIST_HEAD(&d->q_hashlist); xfs_qm_hashinsert(h, d); *O_dq = d; return (0); @@ -1180,8 +1175,6 @@ xfs_qm_internalqcheck( xfs_ino_t lastino; int done, count; int i; - xfs_dqtest_t *d, *e; - xfs_dqhash_t *h1; int error; lastino = 0; @@ -1221,19 +1214,18 @@ xfs_qm_internalqcheck( } cmn_err(CE_DEBUG, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { - h1 = &qmtest_udqtab[i]; - for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + xfs_dqtest_t *d, *n; + xfs_dqhash_t *h; + + h = &qmtest_udqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { xfs_dqtest_cmp(d); - e = (xfs_dqtest_t *) d->HL_NEXT; kmem_free(d); - d = e; } - h1 = &qmtest_gdqtab[i]; - for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { + h = &qmtest_gdqtab[i]; + list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) { xfs_dqtest_cmp(d); - e = (xfs_dqtest_t *) d->HL_NEXT; kmem_free(d); - d = e; } } diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 6f4bbae51aca..3a1b9aa763fc 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -72,46 +72,13 @@ !dqp->q_core.d_rtbcount && \ !dqp->q_core.d_icount) -#define HL_PREVP dq_hashlist.ql_prevp -#define HL_NEXT dq_hashlist.ql_next - - -#define _LIST_REMOVE(h, dqp, PVP, NXT) \ - { \ - xfs_dquot_t *d; \ - if (((d) = (dqp)->NXT)) \ - (d)->PVP = (dqp)->PVP; \ - *((dqp)->PVP) = d; \ - (dqp)->NXT = NULL; \ - (dqp)->PVP = NULL; \ - (h)->qh_version++; \ - (h)->qh_nelems--; \ - } - -#define _LIST_INSERT(h, dqp, PVP, NXT) \ - { \ - xfs_dquot_t *d; \ - if (((d) = (h)->qh_next)) \ - (d)->PVP = &((dqp)->NXT); \ - (dqp)->NXT = d; \ - (dqp)->PVP = &((h)->qh_next); \ - (h)->qh_next = dqp; \ - (h)->qh_version++; \ - (h)->qh_nelems++; \ - } - #define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ (dqp) = (dqp)->dq_flnext) -#define XQM_HASHLIST_INSERT(h, dqp) \ - _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT) - #define XQM_FREELIST_INSERT(h, dqp) \ xfs_qm_freelist_append(h, dqp) -#define XQM_HASHLIST_REMOVE(h, dqp) \ - _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT) #define XQM_FREELIST_REMOVE(dqp) \ xfs_qm_freelist_unlink(dqp) -- cgit v1.2.3 From df308bcfec27e0c6bc83715dfd417caff5c33f19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Mar 2010 10:59:16 +0000 Subject: xfs: remove periodic superblock writeback All modifications to the superblock are done transactional through xfs_trans_log_buf, so there is no reason to initiate periodic asynchronous writeback. This only removes the superblock from the delwri list and will lead to sub-optimal I/O scheduling. Cut down xfs_sync_fsdata now that it's only used for synchronous superblock writes and move the log coverage checks into the two callers. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 90 ++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 63 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a7ba355c21b6..728db015f39c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -356,68 +356,24 @@ xfs_commit_dummy_trans( STATIC int xfs_sync_fsdata( - struct xfs_mount *mp, - int flags) + struct xfs_mount *mp) { struct xfs_buf *bp; - struct xfs_buf_log_item *bip; - int error = 0; /* - * If this is xfssyncd() then only sync the superblock if we can - * lock it without sleeping and it is not pinned. + * If the buffer is pinned then push on the log so we won't get stuck + * waiting in the write for someone, maybe ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have the + * superblock buffer locked at that point so it can become pinned in + * between there and here. */ - if (flags & SYNC_TRYLOCK) { - ASSERT(!(flags & SYNC_WAIT)); - - bp = xfs_getsb(mp, XBF_TRYLOCK); - if (!bp) - goto out; - - bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *); - if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp)) - goto out_brelse; - } else { - bp = xfs_getsb(mp, 0); - - /* - * If the buffer is pinned then push on the log so we won't - * get stuck waiting in the write for someone, maybe - * ourselves, to flush the log. - * - * Even though we just pushed the log above, we did not have - * the superblock buffer locked at that point so it can - * become pinned in between there and here. - */ - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0); - } - - - if (flags & SYNC_WAIT) - XFS_BUF_UNASYNC(bp); - else - XFS_BUF_ASYNC(bp); - - error = xfs_bwrite(mp, bp); - if (error) - return error; - - /* - * If this is a data integrity sync make sure all pending buffers - * are flushed out for the log coverage check below. - */ - if (flags & SYNC_WAIT) - xfs_flush_buftarg(mp->m_ddev_targp, 1); - - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, flags); - return error; + bp = xfs_getsb(mp, 0); + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0); - out_brelse: - xfs_buf_relse(bp); - out: - return error; + XFS_BUF_UNASYNC(bp); + return xfs_bwrite(mp, bp); } /* @@ -441,7 +397,7 @@ int xfs_quiesce_data( struct xfs_mount *mp) { - int error; + int error, error2 = 0; /* push non-blocking */ xfs_sync_data(mp, 0); @@ -452,13 +408,20 @@ xfs_quiesce_data( xfs_qm_sync(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ - error = xfs_sync_fsdata(mp, SYNC_WAIT); + error = xfs_sync_fsdata(mp); + + /* make sure all delwri buffers are written out */ + xfs_flush_buftarg(mp->m_ddev_targp, 1); + + /* mark the log as covered if needed */ + if (xfs_log_need_covered(mp)) + error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) XFS_bflush(mp->m_rtdev_targp); - return error; + return error ? error : error2; } STATIC void @@ -581,9 +544,9 @@ xfs_flush_inodes( } /* - * Every sync period we need to unpin all items, reclaim inodes, sync - * quota and write out the superblock. We might need to cover the log - * to indicate it is idle. + * Every sync period we need to unpin all items, reclaim inodes and sync + * disk quotas. We might need to cover the log to indicate that the + * filesystem is idle. */ STATIC void xfs_sync_worker( @@ -597,7 +560,8 @@ xfs_sync_worker( xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); - error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, 0); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); -- cgit v1.2.3 From 8c38366f99f83a7fa441e0c0669fefc18615e005 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Mar 2010 10:59:40 +0000 Subject: xfs: enforce synchronous writes in xfs_bwrite xfs_bwrite is used with the intention of synchronously writing out buffers, but currently it does not actually clear the async flag if that's left from previous writes but instead implements async behaviour if it finds it. Remove the code handling asynchronous writes as we've got rid of those entirely outside of the log and delwri buffers, and make sure that we clear the async and read flags before writing the buffer. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 17 ++++++----------- fs/xfs/linux-2.6/xfs_sync.c | 1 - 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f7ecc44cbbd3..f01de3c55c43 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1007,25 +1007,20 @@ xfs_bwrite( struct xfs_mount *mp, struct xfs_buf *bp) { - int iowait = (bp->b_flags & XBF_ASYNC) == 0; - int error = 0; + int error; bp->b_strat = xfs_bdstrat_cb; bp->b_mount = mp; bp->b_flags |= XBF_WRITE; - if (!iowait) - bp->b_flags |= _XBF_RUN_QUEUES; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ); xfs_buf_delwri_dequeue(bp); xfs_buf_iostrategy(bp); - if (iowait) { - error = xfs_buf_iowait(bp); - if (error) - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); - } - + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); return error; } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 728db015f39c..3884e20bc14e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -372,7 +372,6 @@ xfs_sync_fsdata( if (XFS_BUF_ISPINNED(bp)) xfs_log_force(mp, 0); - XFS_BUF_UNASYNC(bp); return xfs_bwrite(mp, bp); } -- cgit v1.2.3 From fce1cad651e3cf2779ed8f9e6608daf50d29daaf Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Thu, 25 Mar 2010 17:22:41 +0000 Subject: xfs: xfs_trace.c: duplicated include fs/xfs/linux-2.6/xfs_trace.c: xfs_attr_sf.h is included more than once. Signed-off-by: Andrea Gelmini Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 2a460581308f..207fa77f63ae 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -41,7 +41,6 @@ #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_attr.h" -#include "xfs_attr_sf.h" #include "xfs_attr_leaf.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" -- cgit v1.2.3 From 37bc5743fdc29f60fb104cd9031babbabddff25a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Apr 2010 17:00:59 +1000 Subject: xfs: wait for direct I/O to complete in fsync and write_inode We need to wait for all pending direct I/O requests before taking care of metadata in fsync and write_inode. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_file.c | 2 ++ fs/xfs/linux-2.6/xfs_super.c | 1 + 2 files changed, 3 insertions(+) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 42dd3bcfba6b..d8fb1b5d6cb5 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -115,6 +115,8 @@ xfs_file_fsync( xfs_iflags_clear(ip, XFS_ITRUNCATED); + xfs_ioend_wait(ip); + /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e8ad6dd2c10c..e9002513e08f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1093,6 +1093,7 @@ xfs_fs_write_inode( * the code will only flush the inode if it isn't already * being flushed. */ + xfs_ioend_wait(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); if (ip->i_update_core) { error = xfs_log_inode(ip); -- cgit v1.2.3 From 2d1ff3c75a4642062d314634290be6d8da4ffb03 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 29 Apr 2010 15:13:56 +1000 Subject: xfs: Make fiemap work in query mode. According to Documentation/filesystems/fiemap.txt, If fm_extent_count is zero, then the fm_extents[] array is ignored (no extents will be returned), and the fm_mapped_extents count will hold the number of extents needed. But as the commit 97db39a1f6f69e906e98118392400de5217aa33a has changed bmv_count to the caller's input buffer, this number query function can't work any more. As this commit is written to change bmv_count from MAXEXTNUM because of ENOMEM. This patch just try to set bm.bmv_count to something sane. Thanks to Dave Chinner for the suggestion. Cc: Eric Sandeen Cc: Alex Elder Reviewed-by: Christoph Hellwig Signed-off-by: Tao Ma --- fs/xfs/linux-2.6/xfs_iops.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index e65a7937f3a4..9c8019c78c92 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -673,7 +673,10 @@ xfs_vn_fiemap( bm.bmv_length = BTOBB(length); /* We add one because in getbmap world count includes the header */ - bm.bmv_count = fieinfo->fi_extents_max + 1; + bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : + fieinfo->fi_extents_max + 1; + bm.bmv_count = min_t(__s32, bm.bmv_count, + (PAGE_SIZE * 16 / sizeof(struct getbmapx))); bm.bmv_iflags = BMV_IF_PREALLOC; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) bm.bmv_iflags |= BMV_IF_ATTRFORK; -- cgit v1.2.3 From 046f1685bb5211c3dea74fda0198c19171e9abc9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:52 +0000 Subject: xfs: remove iomap_target Instead of using the iomap_target field in struct xfs_iomap and the IOMAP_REALTIME flag just use the already existing xfs_find_bdev_for_inode helper. There's some fallout as we need to pass the inode in a few more places, which we also use to sanitize some calling conventions. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 39 ++++++++++++++++++--------------------- fs/xfs/xfs_iomap.c | 6 ------ fs/xfs/xfs_iomap.h | 2 -- 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 0f8b9968a803..49dec212da40 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -103,8 +103,9 @@ xfs_count_page_state( STATIC struct block_device * xfs_find_bdev_for_inode( - struct xfs_inode *ip) + struct inode *inode) { + struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; if (XFS_IS_REALTIME_INODE(ip)) @@ -554,19 +555,19 @@ xfs_add_to_ioend( STATIC void xfs_map_buffer( + struct inode *inode, struct buffer_head *bh, xfs_iomap_t *mp, - xfs_off_t offset, - uint block_bits) + xfs_off_t offset) { sector_t bn; ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); - bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + - ((offset - mp->iomap_offset) >> block_bits); + bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) + + ((offset - mp->iomap_offset) >> inode->i_blkbits); - ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); + ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); bh->b_blocknr = bn; set_buffer_mapped(bh); @@ -574,17 +575,17 @@ xfs_map_buffer( STATIC void xfs_map_at_offset( + struct inode *inode, struct buffer_head *bh, - loff_t offset, - int block_bits, - xfs_iomap_t *iomapp) + xfs_iomap_t *iomapp, + xfs_off_t offset) { ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); lock_buffer(bh); - xfs_map_buffer(bh, iomapp, offset, block_bits); - bh->b_bdev = iomapp->iomap_target->bt_bdev; + xfs_map_buffer(inode, bh, iomapp, offset); + bh->b_bdev = xfs_find_bdev_for_inode(inode); set_buffer_mapped(bh); clear_buffer_delay(bh); clear_buffer_unwritten(bh); @@ -750,7 +751,6 @@ xfs_convert_page( xfs_off_t end_offset; unsigned long p_offset; unsigned int type; - int bbits = inode->i_blkbits; int len, page_dirty; int count = 0, done = 0, uptodate = 1; xfs_off_t offset = page_offset(page); @@ -814,7 +814,7 @@ xfs_convert_page( ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); - xfs_map_at_offset(bh, offset, bbits, mp); + xfs_map_at_offset(inode, bh, mp, offset); if (startio) { xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -1174,8 +1174,7 @@ xfs_page_state_convert( iomap_valid = xfs_iomap_valid(&iomap, offset); } if (iomap_valid) { - xfs_map_at_offset(bh, offset, - inode->i_blkbits, &iomap); + xfs_map_at_offset(inode, bh, &iomap, offset); if (startio) { xfs_add_to_ioend(inode, bh, offset, type, &ioend, @@ -1473,10 +1472,8 @@ __xfs_get_blocks( * For unwritten extents do not report a disk address on * the read case (treat as if we're reading into a hole). */ - if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { - xfs_map_buffer(bh_result, &iomap, offset, - inode->i_blkbits); - } + if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) + xfs_map_buffer(inode, bh_result, &iomap, offset); if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { if (direct) bh_result->b_private = inode; @@ -1488,7 +1485,7 @@ __xfs_get_blocks( * If this is a realtime file, data may be on a different device. * to that pointed to from the buffer_head b_bdev currently. */ - bh_result->b_bdev = iomap.iomap_target->bt_bdev; + bh_result->b_bdev = xfs_find_bdev_for_inode(inode); /* * If we previously allocated a block out beyond eof and we are now @@ -1612,7 +1609,7 @@ xfs_vm_direct_IO( struct block_device *bdev; ssize_t ret; - bdev = xfs_find_bdev_for_inode(XFS_I(inode)); + bdev = xfs_find_bdev_for_inode(inode); iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? IOMAP_UNWRITTEN : IOMAP_READ); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2d9bce7fcf85..7327d3f3bf64 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -72,12 +72,6 @@ xfs_imap_to_bmap( iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); iomapp->iomap_flags = flags; - if (XFS_IS_REALTIME_INODE(ip)) { - iomapp->iomap_flags |= IOMAP_REALTIME; - iomapp->iomap_target = mp->m_rtdev_targp; - } else { - iomapp->iomap_target = mp->m_ddev_targp; - } start_block = imap->br_startblock; if (start_block == HOLESTARTBLOCK) { iomapp->iomap_bn = IOMAP_DADDR_NULL; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 174f29990991..a85fa90f6fe7 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -25,7 +25,6 @@ typedef enum { /* iomap_flags values */ IOMAP_READ = 0, /* mapping for a read */ IOMAP_HOLE = 0x02, /* mapping covers a hole */ IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ - IOMAP_REALTIME = 0x10, /* mapping on the realtime device */ IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */ /* but uninitialized file data */ IOMAP_NEW = 0x40 /* just allocate */ @@ -71,7 +70,6 @@ typedef enum { typedef struct xfs_iomap { xfs_daddr_t iomap_bn; /* first 512B blk of mapping */ - xfs_buftarg_t *iomap_target; xfs_off_t iomap_offset; /* offset of mapping, bytes */ xfs_off_t iomap_bsize; /* size of mapping, bytes */ xfs_off_t iomap_delta; /* offset into mapping, bytes */ -- cgit v1.2.3 From 9563b3d8998c78d5b7e718b546d5f68037c494fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:53 +0000 Subject: xfs: remove iomap_delta The iomap_delta field in struct xfs_iomap just contains the difference between the offset passed to xfs_iomap and the iomap_offset. Just calculate it in the only caller that cares. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 6 ++++-- fs/xfs/xfs_iomap.c | 1 - fs/xfs/xfs_iomap.h | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 49dec212da40..577d0b448732 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1512,9 +1512,11 @@ __xfs_get_blocks( } if (direct || size > (1 << inode->i_blkbits)) { - ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); + xfs_off_t iomap_delta = offset - iomap.iomap_offset; + + ASSERT(iomap.iomap_bsize - iomap_delta > 0); offset = min_t(xfs_off_t, - iomap.iomap_bsize - iomap.iomap_delta, size); + iomap.iomap_bsize - iomap_delta, size); bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7327d3f3bf64..c6b409e0f013 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -68,7 +68,6 @@ xfs_imap_to_bmap( xfs_fsblock_t start_block; iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); - iomapp->iomap_delta = offset - iomapp->iomap_offset; iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); iomapp->iomap_flags = flags; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index a85fa90f6fe7..db9299631ee4 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -72,7 +72,6 @@ typedef struct xfs_iomap { xfs_daddr_t iomap_bn; /* first 512B blk of mapping */ xfs_off_t iomap_offset; /* offset of mapping, bytes */ xfs_off_t iomap_bsize; /* size of mapping, bytes */ - xfs_off_t iomap_delta; /* offset into mapping, bytes */ iomap_flags_t iomap_flags; } xfs_iomap_t; -- cgit v1.2.3 From 8699bb0a480193e62d5ccb9c86e2c26b407090a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:54 +0000 Subject: xfs: report iomap_offset and iomap_bsize in block base Report the iomap_offset and iomap_bsize fields of struct xfs_iomap in terms of fsblocks instead of in terms of disk blocks. Shift the byte conversions into the callers temporarily, but they will disappear or get cleaned up later. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 36 +++++++++++++++++++++++++----------- fs/xfs/xfs_iomap.c | 5 ++--- 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 577d0b448732..303a779406c0 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -320,11 +320,16 @@ xfs_map_blocks( STATIC int xfs_iomap_valid( + struct inode *inode, xfs_iomap_t *iomapp, loff_t offset) { - return offset >= iomapp->iomap_offset && - offset < iomapp->iomap_offset + iomapp->iomap_bsize; + struct xfs_mount *mp = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, iomapp->iomap_offset); + xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, iomapp->iomap_bsize); + + return offset >= iomap_offset && + offset < iomap_offset + iomap_bsize; } /* @@ -561,11 +566,13 @@ xfs_map_buffer( xfs_off_t offset) { sector_t bn; + struct xfs_mount *m = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset); ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) + - ((offset - mp->iomap_offset) >> inode->i_blkbits); + ((offset - iomap_offset) >> inode->i_blkbits); ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); @@ -806,7 +813,7 @@ xfs_convert_page( else type = IOMAP_DELAY; - if (!xfs_iomap_valid(mp, offset)) { + if (!xfs_iomap_valid(inode, mp, offset)) { done = 1; continue; } @@ -1116,7 +1123,7 @@ xfs_page_state_convert( } if (iomap_valid) - iomap_valid = xfs_iomap_valid(&iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &iomap, offset); /* * First case, map an unwritten extent and prepare for @@ -1171,7 +1178,7 @@ xfs_page_state_convert( &iomap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(&iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &iomap, offset); } if (iomap_valid) { xfs_map_at_offset(inode, bh, &iomap, offset); @@ -1201,7 +1208,7 @@ xfs_page_state_convert( &iomap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(&iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &iomap, offset); } /* @@ -1241,7 +1248,11 @@ xfs_page_state_convert( xfs_start_page_writeback(page, 1, count); if (ioend && iomap_valid) { - offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> + struct xfs_mount *m = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, iomap.iomap_offset); + xfs_off_t iomap_bsize = XFS_FSB_TO_B(m, iomap.iomap_bsize); + + offset = (iomap_offset + iomap_bsize - 1) >> PAGE_CACHE_SHIFT; tlast = min_t(pgoff_t, offset, last_index); xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, @@ -1512,11 +1523,14 @@ __xfs_get_blocks( } if (direct || size > (1 << inode->i_blkbits)) { - xfs_off_t iomap_delta = offset - iomap.iomap_offset; + struct xfs_mount *mp = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, iomap.iomap_offset); + xfs_off_t iomap_delta = offset - iomap_offset; + xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, iomap.iomap_bsize); - ASSERT(iomap.iomap_bsize - iomap_delta > 0); + ASSERT(iomap_bsize - iomap_delta > 0); offset = min_t(xfs_off_t, - iomap.iomap_bsize - iomap_delta, size); + iomap_bsize - iomap_delta, size); bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c6b409e0f013..49b5ad22a9d8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -64,11 +64,10 @@ xfs_imap_to_bmap( int imaps, /* Number of imap entries */ int flags) { - xfs_mount_t *mp = ip->i_mount; xfs_fsblock_t start_block; - iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); - iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); + iomapp->iomap_offset = imap->br_startoff; + iomapp->iomap_bsize = imap->br_blockcount; iomapp->iomap_flags = flags; start_block = imap->br_startblock; -- cgit v1.2.3 From e513182d4d7ec8f1870ae368c549ef2838e2c105 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:55 +0000 Subject: xfs: report iomap_bn in block base Report the iomap_bn field of struct xfs_iomap in terms of filesystem blocks instead of in terms of bytes. Shift the byte conversions into the caller, and replace the IOMAP_DELAY and IOMAP_HOLE flag checks with checks for HOLESTARTBLOCK and DELAYSTARTBLOCK. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 19 +++++++++++-------- fs/xfs/xfs_iomap.c | 19 +++++-------------- fs/xfs/xfs_iomap.h | 4 ---- 3 files changed, 16 insertions(+), 26 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 303a779406c0..2b09cc34dd07 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -568,10 +568,12 @@ xfs_map_buffer( sector_t bn; struct xfs_mount *m = XFS_I(inode)->i_mount; xfs_off_t iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset); + xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), mp->iomap_bn); - ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); + ASSERT(mp->iomap_bn != HOLESTARTBLOCK); + ASSERT(mp->iomap_bn != DELAYSTARTBLOCK); - bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) + + bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + ((offset - iomap_offset) >> inode->i_blkbits); ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); @@ -587,8 +589,8 @@ xfs_map_at_offset( xfs_iomap_t *iomapp, xfs_off_t offset) { - ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); - ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); + ASSERT(iomapp->iomap_bn != HOLESTARTBLOCK); + ASSERT(iomapp->iomap_bn != DELAYSTARTBLOCK); lock_buffer(bh); xfs_map_buffer(inode, bh, iomapp, offset); @@ -818,8 +820,8 @@ xfs_convert_page( continue; } - ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); - ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); + ASSERT(mp->iomap_bn != HOLESTARTBLOCK); + ASSERT(mp->iomap_bn != DELAYSTARTBLOCK); xfs_map_at_offset(inode, bh, mp, offset); if (startio) { @@ -1478,7 +1480,8 @@ __xfs_get_blocks( if (niomap == 0) return 0; - if (iomap.iomap_bn != IOMAP_DADDR_NULL) { + if (iomap.iomap_bn != HOLESTARTBLOCK && + iomap.iomap_bn != DELAYSTARTBLOCK) { /* * For unwritten extents do not report a disk address on * the read case (treat as if we're reading into a hole). @@ -1513,7 +1516,7 @@ __xfs_get_blocks( (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) set_buffer_new(bh_result); - if (iomap.iomap_flags & IOMAP_DELAY) { + if (iomap.iomap_bn == DELAYSTARTBLOCK) { BUG_ON(direct); if (create) { set_buffer_uptodate(bh_result); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 49b5ad22a9d8..fbe5d32f9ef5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -64,24 +64,15 @@ xfs_imap_to_bmap( int imaps, /* Number of imap entries */ int flags) { - xfs_fsblock_t start_block; - iomapp->iomap_offset = imap->br_startoff; iomapp->iomap_bsize = imap->br_blockcount; iomapp->iomap_flags = flags; + iomapp->iomap_bn = imap->br_startblock; - start_block = imap->br_startblock; - if (start_block == HOLESTARTBLOCK) { - iomapp->iomap_bn = IOMAP_DADDR_NULL; - iomapp->iomap_flags |= IOMAP_HOLE; - } else if (start_block == DELAYSTARTBLOCK) { - iomapp->iomap_bn = IOMAP_DADDR_NULL; - iomapp->iomap_flags |= IOMAP_DELAY; - } else { - iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block); - if (ISUNWRITTEN(imap)) - iomapp->iomap_flags |= IOMAP_UNWRITTEN; - } + if (imap->br_startblock != HOLESTARTBLOCK && + imap->br_startblock != DELAYSTARTBLOCK && + ISUNWRITTEN(imap)) + iomapp->iomap_flags |= IOMAP_UNWRITTEN; } int diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index db9299631ee4..d2f3b67d39f9 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -18,12 +18,8 @@ #ifndef __XFS_IOMAP_H__ #define __XFS_IOMAP_H__ -#define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL)) - - typedef enum { /* iomap_flags values */ IOMAP_READ = 0, /* mapping for a read */ - IOMAP_HOLE = 0x02, /* mapping covers a hole */ IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */ /* but uninitialized file data */ -- cgit v1.2.3 From 207d041602cead1c1a16288f6225aea9da1f5bc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:56 +0000 Subject: xfs: kill struct xfs_iomap Now that struct xfs_iomap contains exactly the same units as struct xfs_bmbt_irec we can just use the latter directly in the aops code. Replace the missing IOMAP_NEW flag with a new boolean output parameter to xfs_iomap. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 90 +++++++++++++++++++++++---------------------- fs/xfs/xfs_iomap.c | 82 ++++++++++++++--------------------------- fs/xfs/xfs_iomap.h | 26 +------------ 3 files changed, 75 insertions(+), 123 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 2b09cc34dd07..70ce1da73d01 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -310,23 +310,24 @@ xfs_map_blocks( struct inode *inode, loff_t offset, ssize_t count, - xfs_iomap_t *mapp, + struct xfs_bmbt_irec *imap, int flags) { int nmaps = 1; + int new = 0; - return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); + return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new); } STATIC int xfs_iomap_valid( struct inode *inode, - xfs_iomap_t *iomapp, + struct xfs_bmbt_irec *imap, loff_t offset) { struct xfs_mount *mp = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, iomapp->iomap_offset); - xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, iomapp->iomap_bsize); + xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); + xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); return offset >= iomap_offset && offset < iomap_offset + iomap_bsize; @@ -562,16 +563,16 @@ STATIC void xfs_map_buffer( struct inode *inode, struct buffer_head *bh, - xfs_iomap_t *mp, + struct xfs_bmbt_irec *imap, xfs_off_t offset) { sector_t bn; struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset); - xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), mp->iomap_bn); + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); + xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); - ASSERT(mp->iomap_bn != HOLESTARTBLOCK); - ASSERT(mp->iomap_bn != DELAYSTARTBLOCK); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + ((offset - iomap_offset) >> inode->i_blkbits); @@ -586,14 +587,14 @@ STATIC void xfs_map_at_offset( struct inode *inode, struct buffer_head *bh, - xfs_iomap_t *iomapp, + struct xfs_bmbt_irec *imap, xfs_off_t offset) { - ASSERT(iomapp->iomap_bn != HOLESTARTBLOCK); - ASSERT(iomapp->iomap_bn != DELAYSTARTBLOCK); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); lock_buffer(bh); - xfs_map_buffer(inode, bh, iomapp, offset); + xfs_map_buffer(inode, bh, imap, offset); bh->b_bdev = xfs_find_bdev_for_inode(inode); set_buffer_mapped(bh); clear_buffer_delay(bh); @@ -750,7 +751,7 @@ xfs_convert_page( struct inode *inode, struct page *page, loff_t tindex, - xfs_iomap_t *mp, + struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, int startio, @@ -815,15 +816,15 @@ xfs_convert_page( else type = IOMAP_DELAY; - if (!xfs_iomap_valid(inode, mp, offset)) { + if (!xfs_iomap_valid(inode, imap, offset)) { done = 1; continue; } - ASSERT(mp->iomap_bn != HOLESTARTBLOCK); - ASSERT(mp->iomap_bn != DELAYSTARTBLOCK); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - xfs_map_at_offset(inode, bh, mp, offset); + xfs_map_at_offset(inode, bh, imap, offset); if (startio) { xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -875,7 +876,7 @@ STATIC void xfs_cluster_write( struct inode *inode, pgoff_t tindex, - xfs_iomap_t *iomapp, + struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, int startio, @@ -894,7 +895,7 @@ xfs_cluster_write( for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, - iomapp, ioendp, wbc, startio, all_bh); + imap, ioendp, wbc, startio, all_bh); if (done) break; } @@ -1051,7 +1052,7 @@ xfs_page_state_convert( int unmapped) /* also implies page uptodate */ { struct buffer_head *bh, *head; - xfs_iomap_t iomap; + struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; loff_t offset; unsigned long p_offset = 0; @@ -1125,7 +1126,7 @@ xfs_page_state_convert( } if (iomap_valid) - iomap_valid = xfs_iomap_valid(inode, &iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &imap, offset); /* * First case, map an unwritten extent and prepare for @@ -1177,13 +1178,13 @@ xfs_page_state_convert( } err = xfs_map_blocks(inode, offset, size, - &iomap, flags); + &imap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(inode, &iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &imap, offset); } if (iomap_valid) { - xfs_map_at_offset(inode, bh, &iomap, offset); + xfs_map_at_offset(inode, bh, &imap, offset); if (startio) { xfs_add_to_ioend(inode, bh, offset, type, &ioend, @@ -1207,10 +1208,10 @@ xfs_page_state_convert( size = xfs_probe_cluster(inode, page, bh, head, 1); err = xfs_map_blocks(inode, offset, size, - &iomap, flags); + &imap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(inode, &iomap, offset); + iomap_valid = xfs_iomap_valid(inode, &imap, offset); } /* @@ -1251,13 +1252,13 @@ xfs_page_state_convert( if (ioend && iomap_valid) { struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, iomap.iomap_offset); - xfs_off_t iomap_bsize = XFS_FSB_TO_B(m, iomap.iomap_bsize); + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff); + xfs_off_t iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount); offset = (iomap_offset + iomap_bsize - 1) >> PAGE_CACHE_SHIFT; tlast = min_t(pgoff_t, offset, last_index); - xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, + xfs_cluster_write(inode, page->index + 1, &imap, &ioend, wbc, startio, all_bh, tlast); } @@ -1460,10 +1461,11 @@ __xfs_get_blocks( int direct, bmapi_flags_t flags) { - xfs_iomap_t iomap; + struct xfs_bmbt_irec imap; xfs_off_t offset; ssize_t size; - int niomap = 1; + int nimap = 1; + int new = 0; int error; offset = (xfs_off_t)iblock << inode->i_blkbits; @@ -1474,21 +1476,21 @@ __xfs_get_blocks( return 0; error = xfs_iomap(XFS_I(inode), offset, size, - create ? flags : BMAPI_READ, &iomap, &niomap); + create ? flags : BMAPI_READ, &imap, &nimap, &new); if (error) return -error; - if (niomap == 0) + if (nimap == 0) return 0; - if (iomap.iomap_bn != HOLESTARTBLOCK && - iomap.iomap_bn != DELAYSTARTBLOCK) { + if (imap.br_startblock != HOLESTARTBLOCK && + imap.br_startblock != DELAYSTARTBLOCK) { /* * For unwritten extents do not report a disk address on * the read case (treat as if we're reading into a hole). */ - if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) - xfs_map_buffer(inode, bh_result, &iomap, offset); - if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { + if (create || !ISUNWRITTEN(&imap)) + xfs_map_buffer(inode, bh_result, &imap, offset); + if (create && ISUNWRITTEN(&imap)) { if (direct) bh_result->b_private = inode; set_buffer_unwritten(bh_result); @@ -1513,10 +1515,10 @@ __xfs_get_blocks( if (create && ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || (offset >= i_size_read(inode)) || - (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) + (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); - if (iomap.iomap_bn == DELAYSTARTBLOCK) { + if (imap.br_startblock == DELAYSTARTBLOCK) { BUG_ON(direct); if (create) { set_buffer_uptodate(bh_result); @@ -1527,9 +1529,9 @@ __xfs_get_blocks( if (direct || size > (1 << inode->i_blkbits)) { struct xfs_mount *mp = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, iomap.iomap_offset); + xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, imap.br_startoff); xfs_off_t iomap_delta = offset - iomap_offset; - xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, iomap.iomap_bsize); + xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, imap.br_blockcount); ASSERT(iomap_bsize - iomap_delta > 0); offset = min_t(xfs_off_t, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fbe5d32f9ef5..7545dcdaa8aa 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -55,46 +55,25 @@ #define XFS_STRAT_WRITE_IMAPS 2 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP -STATIC void -xfs_imap_to_bmap( - xfs_inode_t *ip, - xfs_off_t offset, - xfs_bmbt_irec_t *imap, - xfs_iomap_t *iomapp, - int imaps, /* Number of imap entries */ - int flags) -{ - iomapp->iomap_offset = imap->br_startoff; - iomapp->iomap_bsize = imap->br_blockcount; - iomapp->iomap_flags = flags; - iomapp->iomap_bn = imap->br_startblock; - - if (imap->br_startblock != HOLESTARTBLOCK && - imap->br_startblock != DELAYSTARTBLOCK && - ISUNWRITTEN(imap)) - iomapp->iomap_flags |= IOMAP_UNWRITTEN; -} - int xfs_iomap( - xfs_inode_t *ip, - xfs_off_t offset, - ssize_t count, - int flags, - xfs_iomap_t *iomapp, - int *niomaps) + struct xfs_inode *ip, + xfs_off_t offset, + ssize_t count, + int flags, + struct xfs_bmbt_irec *imap, + int *nimaps, + int *new) { - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int lockmode = 0; - xfs_bmbt_irec_t imap; - int nimaps = 1; - int bmapi_flags = 0; - int iomap_flags = 0; + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int lockmode = 0; + int bmapi_flags = 0; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); - ASSERT(niomaps && *niomaps == 1); + + *new = 0; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -136,8 +115,8 @@ xfs_iomap( error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), - bmapi_flags, NULL, 0, &imap, - &nimaps, NULL, NULL); + bmapi_flags, NULL, 0, imap, + nimaps, NULL, NULL); if (error) goto out; @@ -145,45 +124,41 @@ xfs_iomap( switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { case BMAPI_WRITE: /* If we found an extent, return it */ - if (nimaps && - (imap.br_startblock != HOLESTARTBLOCK) && - (imap.br_startblock != DELAYSTARTBLOCK)) { - trace_xfs_iomap_found(ip, offset, count, flags, &imap); + if (*nimaps && + (imap->br_startblock != HOLESTARTBLOCK) && + (imap->br_startblock != DELAYSTARTBLOCK)) { + trace_xfs_iomap_found(ip, offset, count, flags, imap); break; } if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { error = xfs_iomap_write_direct(ip, offset, count, flags, - &imap, &nimaps, nimaps); + imap, nimaps, *nimaps); } else { error = xfs_iomap_write_delay(ip, offset, count, flags, - &imap, &nimaps); + imap, nimaps); } if (!error) { - trace_xfs_iomap_alloc(ip, offset, count, flags, &imap); + trace_xfs_iomap_alloc(ip, offset, count, flags, imap); } - iomap_flags = IOMAP_NEW; + *new = 1; break; case BMAPI_ALLOCATE: /* If we found an extent, return it */ xfs_iunlock(ip, lockmode); lockmode = 0; - if (nimaps && !isnullstartblock(imap.br_startblock)) { - trace_xfs_iomap_found(ip, offset, count, flags, &imap); + if (*nimaps && !isnullstartblock(imap->br_startblock)) { + trace_xfs_iomap_found(ip, offset, count, flags, imap); break; } error = xfs_iomap_write_allocate(ip, offset, count, - &imap, &nimaps); + imap, nimaps); break; } - ASSERT(nimaps <= 1); - - if (nimaps) - xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags); - *niomaps = nimaps; + ASSERT(*nimaps <= 1); out: if (lockmode) @@ -191,7 +166,6 @@ out: return XFS_ERROR(error); } - STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index d2f3b67d39f9..ba49a4fd1b3f 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -47,35 +47,11 @@ typedef enum { { BMAPI_MMAP, "MMAP" }, \ { BMAPI_TRYLOCK, "TRYLOCK" } -/* - * xfs_iomap_t: File system I/O map - * - * The iomap_bn field is expressed in 512-byte blocks, and is where the - * mapping starts on disk. - * - * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes. - * iomap_offset is the offset of the mapping in the file itself. - * iomap_bsize is the size of the mapping, iomap_delta is the - * desired data's offset into the mapping, given the offset supplied - * to the file I/O map routine. - * - * When a request is made to read beyond the logical end of the object, - * iomap_size may be set to 0, but iomap_offset and iomap_length should be set - * to the actual amount of underlying storage that has been allocated, if any. - */ - -typedef struct xfs_iomap { - xfs_daddr_t iomap_bn; /* first 512B blk of mapping */ - xfs_off_t iomap_offset; /* offset of mapping, bytes */ - xfs_off_t iomap_bsize; /* size of mapping, bytes */ - iomap_flags_t iomap_flags; -} xfs_iomap_t; - struct xfs_inode; struct xfs_bmbt_irec; extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, - struct xfs_iomap *, int *); + struct xfs_bmbt_irec *, int *, int *); extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, int, struct xfs_bmbt_irec *, int *, int); extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int, -- cgit v1.2.3 From 34a52c6c064fb9f1fd1310407ce076a4bb049734 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:57 +0000 Subject: xfs: move I/O type flags into xfs_aops.c The IOMAP_ flags are now only used inside xfs_aops.c for extent probing and I/O completion tracking, so more them here, and rename them to IO_* as there's no mapping involved at all. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 51 ++++++++++++++++++++++++++------------------- fs/xfs/xfs_iomap.h | 8 ------- 2 files changed, 30 insertions(+), 29 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 70ce1da73d01..f1dd70e201cf 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -45,6 +45,15 @@ #include #include +/* + * Types of I/O for bmap clustering and I/O completion tracking. + */ +enum { + IO_READ, /* mapping for a read */ + IO_DELAY, /* mapping covers delalloc region */ + IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ + IO_NEW /* just allocated */ +}; /* * Prime number of hash buckets since address is used as the key. @@ -184,7 +193,7 @@ xfs_setfilesize( xfs_fsize_t isize; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); - ASSERT(ioend->io_type != IOMAP_READ); + ASSERT(ioend->io_type != IO_READ); if (unlikely(ioend->io_error)) return 0; @@ -215,7 +224,7 @@ xfs_finish_ioend( if (atomic_dec_and_test(&ioend->io_remaining)) { struct workqueue_struct *wq; - wq = (ioend->io_type == IOMAP_UNWRITTEN) ? + wq = (ioend->io_type == IO_UNWRITTEN) ? xfsconvertd_workqueue : xfsdatad_workqueue; queue_work(wq, &ioend->io_work); if (wait) @@ -238,7 +247,7 @@ xfs_end_io( * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. */ - if (ioend->io_type == IOMAP_UNWRITTEN && + if (ioend->io_type == IO_UNWRITTEN && likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { error = xfs_iomap_write_unwritten(ip, ioend->io_offset, @@ -251,7 +260,7 @@ xfs_end_io( * We might have to update the on-disk file size after extending * writes. */ - if (ioend->io_type != IOMAP_READ) { + if (ioend->io_type != IO_READ) { error = xfs_setfilesize(ioend); ASSERT(!error || error == EAGAIN); } @@ -724,11 +733,11 @@ xfs_is_delayed_page( bh = head = page_buffers(page); do { if (buffer_unwritten(bh)) - acceptable = (type == IOMAP_UNWRITTEN); + acceptable = (type == IO_UNWRITTEN); else if (buffer_delay(bh)) - acceptable = (type == IOMAP_DELAY); + acceptable = (type == IO_DELAY); else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable = (type == IOMAP_NEW); + acceptable = (type == IO_NEW); else break; } while ((bh = bh->b_this_page) != head); @@ -812,9 +821,9 @@ xfs_convert_page( if (buffer_unwritten(bh) || buffer_delay(bh)) { if (buffer_unwritten(bh)) - type = IOMAP_UNWRITTEN; + type = IO_UNWRITTEN; else - type = IOMAP_DELAY; + type = IO_DELAY; if (!xfs_iomap_valid(inode, imap, offset)) { done = 1; @@ -836,7 +845,7 @@ xfs_convert_page( page_dirty--; count++; } else { - type = IOMAP_NEW; + type = IO_NEW; if (buffer_mapped(bh) && all_bh && startio) { lock_buffer(bh); xfs_add_to_ioend(inode, bh, offset, @@ -940,7 +949,7 @@ xfs_aops_discard_page( loff_t offset = page_offset(page); ssize_t len = 1 << inode->i_blkbits; - if (!xfs_is_delayed_page(page, IOMAP_DELAY)) + if (!xfs_is_delayed_page(page, IO_DELAY)) goto out_invalidate; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -1107,7 +1116,7 @@ xfs_page_state_convert( bh = head = page_buffers(page); offset = page_offset(page); flags = BMAPI_READ; - type = IOMAP_NEW; + type = IO_NEW; /* TODO: cleanup count and page_dirty */ @@ -1150,13 +1159,13 @@ xfs_page_state_convert( iomap_valid = 0; if (buffer_unwritten(bh)) { - type = IOMAP_UNWRITTEN; + type = IO_UNWRITTEN; flags = BMAPI_WRITE | BMAPI_IGNSTATE; } else if (buffer_delay(bh)) { - type = IOMAP_DELAY; + type = IO_DELAY; flags = BMAPI_ALLOCATE | trylock; } else { - type = IOMAP_NEW; + type = IO_NEW; flags = BMAPI_WRITE | BMAPI_MMAP; } @@ -1170,7 +1179,7 @@ xfs_page_state_convert( * for unwritten extent conversion. */ new_ioend = 1; - if (type == IOMAP_NEW) { + if (type == IO_NEW) { size = xfs_probe_cluster(inode, page, bh, head, 0); } else { @@ -1215,14 +1224,14 @@ xfs_page_state_convert( } /* - * We set the type to IOMAP_NEW in case we are doing a + * We set the type to IO_NEW in case we are doing a * small write at EOF that is extending the file but * without needing an allocation. We need to update the * file size on I/O completion in this case so it is * the same case as having just allocated a new extent * that we are writing into for the first time. */ - type = IOMAP_NEW; + type = IO_NEW; if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) @@ -1594,7 +1603,7 @@ xfs_end_io_direct( */ ioend->io_offset = offset; ioend->io_size = size; - if (ioend->io_type == IOMAP_READ) { + if (ioend->io_type == IO_READ) { xfs_finish_ioend(ioend, 0); } else if (private && size > 0) { xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); @@ -1605,7 +1614,7 @@ xfs_end_io_direct( * didn't map an unwritten extent so switch it's completion * handler. */ - ioend->io_type = IOMAP_NEW; + ioend->io_type = IO_NEW; xfs_finish_ioend(ioend, 0); } @@ -1633,7 +1642,7 @@ xfs_vm_direct_IO( bdev = xfs_find_bdev_for_inode(inode); iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? - IOMAP_UNWRITTEN : IOMAP_READ); + IO_UNWRITTEN : IO_READ); ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, offset, nr_segs, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index ba49a4fd1b3f..41e32d20a405 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -18,14 +18,6 @@ #ifndef __XFS_IOMAP_H__ #define __XFS_IOMAP_H__ -typedef enum { /* iomap_flags values */ - IOMAP_READ = 0, /* mapping for a read */ - IOMAP_DELAY = 0x04, /* mapping covers delalloc region */ - IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */ - /* but uninitialized file data */ - IOMAP_NEW = 0x40 /* just allocate */ -} iomap_flags_t; - typedef enum { /* base extent manipulation calls */ BMAPI_READ = (1 << 0), /* read extents */ -- cgit v1.2.3 From 558e6891693f4c383c51c7343a88dea174eadacf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:58 +0000 Subject: xfs: clean up xfs_iomap_valid Rename all iomap_valid identifiers to imap_valid to fit the new world order, and clean up xfs_iomap_valid to convert the passed in offset to blocks instead of the imap values to bytes. Use the simpler inode->i_blkbits instead of the XFS macros for this. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f1dd70e201cf..6feecd279470 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -329,17 +329,15 @@ xfs_map_blocks( } STATIC int -xfs_iomap_valid( +xfs_imap_valid( struct inode *inode, struct xfs_bmbt_irec *imap, - loff_t offset) + xfs_off_t offset) { - struct xfs_mount *mp = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); - xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount); + offset >>= inode->i_blkbits; - return offset >= iomap_offset && - offset < iomap_offset + iomap_bsize; + return offset >= imap->br_startoff && + offset < imap->br_startoff + imap->br_blockcount; } /* @@ -825,7 +823,7 @@ xfs_convert_page( else type = IO_DELAY; - if (!xfs_iomap_valid(inode, imap, offset)) { + if (!xfs_imap_valid(inode, imap, offset)) { done = 1; continue; } @@ -1069,7 +1067,7 @@ xfs_page_state_convert( __uint64_t end_offset; pgoff_t end_index, last_index, tlast; ssize_t size, len; - int flags, err, iomap_valid = 0, uptodate = 1; + int flags, err, imap_valid = 0, uptodate = 1; int page_dirty, count = 0; int trylock = 0; int all_bh = unmapped; @@ -1130,12 +1128,12 @@ xfs_page_state_convert( * the iomap is actually still valid, but the ioend * isn't. shouldn't happen too often. */ - iomap_valid = 0; + imap_valid = 0; continue; } - if (iomap_valid) - iomap_valid = xfs_iomap_valid(inode, &imap, offset); + if (imap_valid) + imap_valid = xfs_imap_valid(inode, &imap, offset); /* * First case, map an unwritten extent and prepare for @@ -1156,7 +1154,7 @@ xfs_page_state_convert( * Make sure we don't use a read-only iomap */ if (flags == BMAPI_READ) - iomap_valid = 0; + imap_valid = 0; if (buffer_unwritten(bh)) { type = IO_UNWRITTEN; @@ -1169,7 +1167,7 @@ xfs_page_state_convert( flags = BMAPI_WRITE | BMAPI_MMAP; } - if (!iomap_valid) { + if (!imap_valid) { /* * if we didn't have a valid mapping then we * need to ensure that we put the new mapping @@ -1190,9 +1188,10 @@ xfs_page_state_convert( &imap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(inode, &imap, offset); + imap_valid = xfs_imap_valid(inode, &imap, + offset); } - if (iomap_valid) { + if (imap_valid) { xfs_map_at_offset(inode, bh, &imap, offset); if (startio) { xfs_add_to_ioend(inode, bh, offset, @@ -1212,7 +1211,7 @@ xfs_page_state_convert( * That means it must already have extents allocated * underneath it. Map the extent by reading it. */ - if (!iomap_valid || flags != BMAPI_READ) { + if (!imap_valid || flags != BMAPI_READ) { flags = BMAPI_READ; size = xfs_probe_cluster(inode, page, bh, head, 1); @@ -1220,7 +1219,8 @@ xfs_page_state_convert( &imap, flags); if (err) goto error; - iomap_valid = xfs_iomap_valid(inode, &imap, offset); + imap_valid = xfs_imap_valid(inode, &imap, + offset); } /* @@ -1234,18 +1234,18 @@ xfs_page_state_convert( type = IO_NEW; if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); - if (iomap_valid) + if (imap_valid) all_bh = 1; xfs_add_to_ioend(inode, bh, offset, type, - &ioend, !iomap_valid); + &ioend, !imap_valid); page_dirty--; count++; } else { - iomap_valid = 0; + imap_valid = 0; } } else if ((buffer_uptodate(bh) || PageUptodate(page)) && (unmapped || startio)) { - iomap_valid = 0; + imap_valid = 0; } if (!iohead) @@ -1259,7 +1259,7 @@ xfs_page_state_convert( if (startio) xfs_start_page_writeback(page, 1, count); - if (ioend && iomap_valid) { + if (ioend && imap_valid) { struct xfs_mount *m = XFS_I(inode)->i_mount; xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff); xfs_off_t iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount); -- cgit v1.2.3 From 2b8f12b7e438fa6ba4a0f8f861871be0beb3a3e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:28:59 +0000 Subject: xfs: clean up mapping size calculation in __xfs_get_blocks Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 6feecd279470..1d51bdde5748 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1536,16 +1536,23 @@ __xfs_get_blocks( } } + /* + * If this is O_DIRECT or the mpage code calling tell them how large + * the mapping is, so that we can avoid repeated get_blocks calls. + */ if (direct || size > (1 << inode->i_blkbits)) { - struct xfs_mount *mp = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(mp, imap.br_startoff); - xfs_off_t iomap_delta = offset - iomap_offset; - xfs_off_t iomap_bsize = XFS_FSB_TO_B(mp, imap.br_blockcount); - - ASSERT(iomap_bsize - iomap_delta > 0); - offset = min_t(xfs_off_t, - iomap_bsize - iomap_delta, size); - bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); + xfs_off_t mapping_size; + + mapping_size = imap.br_startoff + imap.br_blockcount - iblock; + mapping_size <<= inode->i_blkbits; + + ASSERT(mapping_size > 0); + if (mapping_size > size) + mapping_size = size; + if (mapping_size > LONG_MAX) + mapping_size = LONG_MAX; + + bh_result->b_size = mapping_size; } return 0; -- cgit v1.2.3 From bd1556a146d46070049428dded306829cb65161d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Apr 2010 12:29:00 +0000 Subject: xfs: clean up end index calculation in xfs_page_state_convert Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_aops.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 1d51bdde5748..089eaca860b4 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1065,7 +1065,7 @@ xfs_page_state_convert( unsigned long p_offset = 0; unsigned int type; __uint64_t end_offset; - pgoff_t end_index, last_index, tlast; + pgoff_t end_index, last_index; ssize_t size, len; int flags, err, imap_valid = 0, uptodate = 1; int page_dirty, count = 0; @@ -1260,15 +1260,22 @@ xfs_page_state_convert( xfs_start_page_writeback(page, 1, count); if (ioend && imap_valid) { - struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff); - xfs_off_t iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount); + xfs_off_t end_index; + + end_index = imap.br_startoff + imap.br_blockcount; + + /* to bytes */ + end_index <<= inode->i_blkbits; + + /* to pages */ + end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; + + /* check against file size */ + if (end_index > last_index) + end_index = last_index; - offset = (iomap_offset + iomap_bsize - 1) >> - PAGE_CACHE_SHIFT; - tlast = min_t(pgoff_t, offset, last_index); xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, startio, all_bh, tlast); + wbc, startio, all_bh, end_index); } if (iohead) -- cgit v1.2.3 From b9b2dd36c1bc64430f8e13990ab135cbecc10076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 May 2010 17:04:58 -0400 Subject: quota: unify ->get_dqblk Pass the larger struct fs_disk_quota to the ->get_dqblk operation so that the Q_GETQUOTA and Q_XGETQUOTA operations can be implemented with a single filesystem operation and we can retire the ->get_xquota operation. The additional information (RT-subvolume accounting and warn counts) are left zero for the VFS quota implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/gfs2/quota.c | 6 +++--- fs/quota/dquot.c | 27 ++++++++++++++++----------- fs/quota/quota.c | 23 ++++++++++++++++++----- fs/xfs/linux-2.6/xfs_quotaops.c | 4 ++-- include/linux/quota.h | 3 +-- include/linux/quotaops.h | 3 ++- 6 files changed, 42 insertions(+), 24 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index d5f4661287f9..dec93577a783 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1476,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb, return 0; } -static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, - struct fs_disk_quota *fdq) +static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_lvb *qlvb; @@ -1629,7 +1629,7 @@ out_put: const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, - .get_xquota = gfs2_xquota_get, + .get_dqblk = gfs2_get_dqblk, .set_xquota = gfs2_xquota_set, }; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 01347e81d0ca..6aed77fc99c7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2301,25 +2301,30 @@ static inline qsize_t stoqb(qsize_t space) } /* Generic routine for getting common part of quota structure */ -static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) +static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) { struct mem_dqblk *dm = &dquot->dq_dqb; + memset(di, 0, sizeof(*di)); + di->d_version = FS_DQUOT_VERSION; + di->d_flags = dquot->dq_type == USRQUOTA ? + XFS_USER_QUOTA : XFS_GROUP_QUOTA; + di->d_id = dquot->dq_id; + spin_lock(&dq_data_lock); - di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); - di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); - di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace; - di->dqb_ihardlimit = dm->dqb_ihardlimit; - di->dqb_isoftlimit = dm->dqb_isoftlimit; - di->dqb_curinodes = dm->dqb_curinodes; - di->dqb_btime = dm->dqb_btime; - di->dqb_itime = dm->dqb_itime; - di->dqb_valid = QIF_ALL; + di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); + di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); + di->d_ino_hardlimit = dm->dqb_ihardlimit; + di->d_ino_softlimit = dm->dqb_isoftlimit; + di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; + di->d_icount = dm->dqb_curinodes; + di->d_btimer = dm->dqb_btime; + di->d_itimer = dm->dqb_itime; spin_unlock(&dq_data_lock); } int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, - struct if_dqblk *di) + struct fs_disk_quota *di) { struct dquot *dquot; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95388f9b7356..8680e257c2bd 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -136,19 +136,32 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) return sb->s_qcop->set_info(sb, type, &info); } +static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) +{ + dst->dqb_bhardlimit = src->d_blk_hardlimit; + dst->dqb_bsoftlimit = src->d_blk_softlimit; + dst->dqb_curspace = src->d_bcount; + dst->dqb_ihardlimit = src->d_ino_hardlimit; + dst->dqb_isoftlimit = src->d_ino_softlimit; + dst->dqb_curinodes = src->d_icount; + dst->dqb_btime = src->d_btimer; + dst->dqb_itime = src->d_itimer; + dst->dqb_valid = QIF_ALL; +} + static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { + struct fs_disk_quota fdq; struct if_dqblk idq; int ret; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); + ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); if (ret) return ret; + copy_to_if_dqblk(&idq, &fdq); if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; return 0; @@ -210,9 +223,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, struct fs_disk_quota fdq; int ret; - if (!sb->s_qcop->get_xquota) + if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); + ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 1947514ce1ad..3d473f43c9a9 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -97,7 +97,7 @@ xfs_fs_set_xstate( } STATIC int -xfs_fs_get_xquota( +xfs_fs_get_dqblk( struct super_block *sb, int type, qid_t id, @@ -135,6 +135,6 @@ xfs_fs_set_xquota( const struct quotactl_ops xfs_quotactl_operations = { .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, - .get_xquota = xfs_fs_get_xquota, + .get_dqblk = xfs_fs_get_dqblk, .set_xquota = xfs_fs_set_xquota, }; diff --git a/include/linux/quota.h b/include/linux/quota.h index cdfde10481b7..42364219dc9b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -337,11 +337,10 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); - int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index e6fa7acce290..d32a48631b0d 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -63,7 +63,8 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags); int vfs_quota_sync(struct super_block *sb, int type, int wait); int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *di); int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); int dquot_transfer(struct inode *inode, struct iattr *iattr); -- cgit v1.2.3 From c472b43275976512e4c1c32da5ced03f339cb380 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 May 2010 17:05:17 -0400 Subject: quota: unify ->set_dqblk Pass the larger struct fs_disk_quota to the ->set_dqblk operation so that the Q_SETQUOTA and Q_XSETQUOTA operations can be implemented with a single filesystem operation and we can retire the ->set_xquota operation. The additional information (RT-subvolume accounting and warn counts) are left zero for the VFS quota implementation. Add new fieldmask values for setting the numer of blocks and inodes values which is required for the VFS quota, but wasn't for XFS. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/gfs2/quota.c | 6 ++-- fs/quota/dquot.c | 67 ++++++++++++++++++++++++++--------------- fs/quota/quota.c | 36 +++++++++++++++++++--- fs/xfs/linux-2.6/xfs_quotaops.c | 4 +-- fs/xfs/quota/xfs_qm_syscalls.c | 10 ++++-- include/linux/dqblk_xfs.h | 9 ++++++ include/linux/quota.h | 3 +- include/linux/quotaops.h | 3 +- 8 files changed, 98 insertions(+), 40 deletions(-) (limited to 'fs/xfs/linux-2.6') diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index dec93577a783..49667d68769e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1521,8 +1521,8 @@ out: /* GFS2 only supports a subset of the XFS fields */ #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) -static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, - struct fs_disk_quota *fdq) +static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); @@ -1630,6 +1630,6 @@ const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, .get_dqblk = gfs2_get_dqblk, - .set_xquota = gfs2_xquota_set, + .set_dqblk = gfs2_set_dqblk, }; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 6aed77fc99c7..b1a5036560a9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2338,51 +2338,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, } EXPORT_SYMBOL(vfs_get_dqblk); +#define VFS_FS_DQ_MASK \ + (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ + FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ + FS_DQ_BTIMER | FS_DQ_ITIMER) + /* Generic routine for setting common part of quota structure */ -static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) +static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; - if ((di->dqb_valid & QIF_BLIMITS && - (di->dqb_bhardlimit > dqi->dqi_maxblimit || - di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || - (di->dqb_valid & QIF_ILIMITS && - (di->dqb_ihardlimit > dqi->dqi_maxilimit || - di->dqb_isoftlimit > dqi->dqi_maxilimit))) + if (di->d_fieldmask & ~VFS_FS_DQ_MASK) + return -EINVAL; + + if (((di->d_fieldmask & FS_DQ_BSOFT) && + (di->d_blk_softlimit > dqi->dqi_maxblimit)) || + ((di->d_fieldmask & FS_DQ_BHARD) && + (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || + ((di->d_fieldmask & FS_DQ_ISOFT) && + (di->d_ino_softlimit > dqi->dqi_maxilimit)) || + ((di->d_fieldmask & FS_DQ_IHARD) && + (di->d_ino_hardlimit > dqi->dqi_maxilimit))) return -ERANGE; spin_lock(&dq_data_lock); - if (di->dqb_valid & QIF_SPACE) { - dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; + if (di->d_fieldmask & FS_DQ_BCOUNT) { + dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_BLIMITS) { - dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); - dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); + + if (di->d_fieldmask & FS_DQ_BSOFT) + dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); + if (di->d_fieldmask & FS_DQ_BHARD) + dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); + if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_INODES) { - dm->dqb_curinodes = di->dqb_curinodes; + + if (di->d_fieldmask & FS_DQ_ICOUNT) { + dm->dqb_curinodes = di->d_icount; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_ILIMITS) { - dm->dqb_isoftlimit = di->dqb_isoftlimit; - dm->dqb_ihardlimit = di->dqb_ihardlimit; + + if (di->d_fieldmask & FS_DQ_ISOFT) + dm->dqb_isoftlimit = di->d_ino_softlimit; + if (di->d_fieldmask & FS_DQ_IHARD) + dm->dqb_ihardlimit = di->d_ino_hardlimit; + if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_BTIME) { - dm->dqb_btime = di->dqb_btime; + + if (di->d_fieldmask & FS_DQ_BTIMER) { + dm->dqb_btime = di->d_btimer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_ITIME) { - dm->dqb_itime = di->dqb_itime; + + if (di->d_fieldmask & FS_DQ_ITIMER) { + dm->dqb_itime = di->d_itimer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } @@ -2392,7 +2411,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) dm->dqb_curspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - } else if (!(di->dqb_valid & QIF_BTIME)) + } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; } @@ -2401,7 +2420,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) dm->dqb_curinodes < dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - } else if (!(di->dqb_valid & QIF_ITIME)) + } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; } @@ -2417,7 +2436,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) } int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, - struct if_dqblk *di) + struct fs_disk_quota *di) { struct dquot *dquot; int rc; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 8680e257c2bd..d6ee49dda4fd 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -167,18 +167,44 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } +static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) +{ + dst->d_blk_hardlimit = src->dqb_bhardlimit; + dst->d_blk_softlimit = src->dqb_bsoftlimit; + dst->d_bcount = src->dqb_curspace; + dst->d_ino_hardlimit = src->dqb_ihardlimit; + dst->d_ino_softlimit = src->dqb_isoftlimit; + dst->d_icount = src->dqb_curinodes; + dst->d_btimer = src->dqb_btime; + dst->d_itimer = src->dqb_itime; + + dst->d_fieldmask = 0; + if (src->dqb_valid & QIF_BLIMITS) + dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; + if (src->dqb_valid & QIF_SPACE) + dst->d_fieldmask |= FS_DQ_BCOUNT; + if (src->dqb_valid & QIF_ILIMITS) + dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; + if (src->dqb_valid & QIF_INODES) + dst->d_fieldmask |= FS_DQ_ICOUNT; + if (src->dqb_valid & QIF_BTIME) + dst->d_fieldmask |= FS_DQ_BTIMER; + if (src->dqb_valid & QIF_ITIME) + dst->d_fieldmask |= FS_DQ_ITIMER; +} + static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { + struct fs_disk_quota fdq; struct if_dqblk idq; if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->set_dqblk) return -ENOSYS; - return sb->s_qcop->set_dqblk(sb, type, id, &idq); + copy_from_if_dqblk(&fdq, &idq); + return sb->s_qcop->set_dqblk(sb, type, id, &fdq); } static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) @@ -212,9 +238,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; - if (!sb->s_qcop->set_xquota) + if (!sb->s_qcop->set_dqblk) return -ENOSYS; - return sb->s_qcop->set_xquota(sb, type, id, &fdq); + return sb->s_qcop->set_dqblk(sb, type, id, &fdq); } static int quota_getxquota(struct super_block *sb, int type, qid_t id, diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 3d473f43c9a9..e31bf21fe5d3 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -114,7 +114,7 @@ xfs_fs_get_dqblk( } STATIC int -xfs_fs_set_xquota( +xfs_fs_set_dqblk( struct super_block *sb, int type, qid_t id, @@ -136,5 +136,5 @@ const struct quotactl_ops xfs_quotactl_operations = { .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, .get_dqblk = xfs_fs_get_dqblk, - .set_xquota = xfs_fs_set_xquota, + .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 26fa43140f2e..92b002f1805f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -448,6 +448,9 @@ xfs_qm_scall_getqstat( return 0; } +#define XFS_DQ_MASK \ + (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) + /* * Adjust quota limits, and start/stop timers accordingly. */ @@ -465,9 +468,10 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if ((newlim->d_fieldmask & - (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) - return (0); + if (newlim->d_fieldmask & ~XFS_DQ_MASK) + return EINVAL; + if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + return 0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, diff --git a/include/linux/dqblk_xfs.h b/include/linux/dqblk_xfs.h index 527504c11c5e..4389ae72024e 100644 --- a/include/linux/dqblk_xfs.h +++ b/include/linux/dqblk_xfs.h @@ -109,6 +109,15 @@ typedef struct fs_disk_quota { #define FS_DQ_RTBWARNS (1<<11) #define FS_DQ_WARNS_MASK (FS_DQ_BWARNS | FS_DQ_IWARNS | FS_DQ_RTBWARNS) +/* + * Accounting values. These can only be set for filesystem with + * non-transactional quotas that require quotacheck(8) in userspace. + */ +#define FS_DQ_BCOUNT (1<<12) +#define FS_DQ_ICOUNT (1<<13) +#define FS_DQ_RTBCOUNT (1<<14) +#define FS_DQ_ACCT_MASK (FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT) + /* * Various flags related to quotactl(2). Only relevant to XFS filesystems. */ diff --git a/include/linux/quota.h b/include/linux/quota.h index 42364219dc9b..7126a15467f1 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -338,10 +338,9 @@ struct quotactl_ops { int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*set_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); - int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); }; struct quota_format_type { diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d32a48631b0d..82c70c42d035 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -65,7 +65,8 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *di); -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *di); int dquot_transfer(struct inode *inode, struct iattr *iattr); int vfs_dq_quota_on_remount(struct super_block *sb); -- cgit v1.2.3