From 9bf729c0af67897ea8498ce17c29b0683f7f2028 Mon Sep 17 00:00:00 2001
From: Dave Chinner
Date: Thu, 29 Apr 2010 09:55:50 +1000
Subject: xfs: add a shrinker to background inode reclaim

On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_super.c |   5 ++
 fs/xfs/linux-2.6/xfs_sync.c  | 112 ++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_sync.h  |   7 ++-
 3 files changed, 116 insertions(+), 8 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b487ced..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
 
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
+	xfs_inode_shrinker_unregister(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
 	xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_vnrele;
 
+	xfs_inode_shrinker_register(mp);
+
 	kfree(mtpt);
 	return 0;
 
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
 		goto out_cleanup_procfs;
 
 	vfs_initquota();
+	xfs_inode_shrinker_init();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
 {
 	vfs_exitquota();
 	unregister_filesystem(&xfs_fs_type);
+	xfs_inode_shrinker_destroy();
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd9698215759..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	uint32_t		first_index;
 	int			last_error = 0;
@@ -134,7 +135,7 @@ restart:
 		if (error == EFSCORRUPTED)
 			break;
 
-	} while (1);
+	} while ((*nr_to_scan)--);
 
 	if (skipped) {
 		delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	int			error = 0;
 	int			last_error = 0;
 	xfs_agnumber_t		ag;
+	int			nr;
 
+	nr = nr_to_scan ? *nr_to_scan : INT_MAX;
 	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
 		struct xfs_perag	*pag;
 
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
 			continue;
 		}
 		error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
-						exclusive);
+						exclusive, &nr);
 		xfs_perag_put(pag);
 		if (error) {
 			last_error = error;
 			if (error == EFSCORRUPTED)
 				break;
 		}
+		if (nr <= 0)
+			break;
 	}
+	if (nr_to_scan)
+		*nr_to_scan = nr;
 	return XFS_ERROR(last_error);
 }
 
@@ -291,7 +299,7 @@ xfs_sync_data(
 	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG, 0);
+				      XFS_ICI_NO_TAG, 0, NULL);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -310,7 +318,7 @@ xfs_sync_attr(
 	ASSERT((flags & ~SYNC_WAIT) == 0);
 
 	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG, 0);
+				     XFS_ICI_NO_TAG, 0, NULL);
 }
 
 STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
 	radix_tree_tag_set(&pag->pag_ici_root,
 			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
 			   XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable++;
 }
 
 /*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
 {
 	radix_tree_tag_clear(&pag->pag_ici_root,
 			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable--;
 }
 
 /*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-					XFS_ICI_RECLAIM_TAG, 1);
+					XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
+{
+	struct xfs_mount *mp;
+	struct xfs_perag *pag;
+	xfs_agnumber_t	ag;
+	int		reclaimable = 0;
+
+	if (nr_to_scan) {
+		if (!(gfp_mask & __GFP_FS))
+			return -1;
+
+		down_read(&xfs_mount_list_lock);
+		list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+			xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+					XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+			if (nr_to_scan <= 0)
+				break;
+		}
+		up_read(&xfs_mount_list_lock);
+	}
+
+	down_read(&xfs_mount_list_lock);
+	list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+			pag = xfs_perag_get(mp, ag);
+			if (!pag->pag_ici_init) {
+				xfs_perag_put(pag);
+				continue;
+			}
+			reclaimable += pag->pag_ici_reclaimable;
+			xfs_perag_put(pag);
+		}
+	}
+	up_read(&xfs_mount_list_lock);
+	return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+	.shrink = xfs_reclaim_inode_shrink,
+	.seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+	init_rwsem(&xfs_mount_list_lock);
+	register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+	ASSERT(list_empty(&xfs_mount_list));
+	unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_add_tail(&mp->m_mplist, &xfs_mount_list);
+	up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_del(&mp->m_mplist);
+	up_write(&xfs_mount_list_lock);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag, int write_lock);
+	int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
 #endif
-- 
cgit v1.2.3


From fda168c24586ab8e01b0eb68028d78fe3e4fb71a Mon Sep 17 00:00:00 2001
From: Zhitong Wang
Date: Tue, 23 Mar 2010 09:51:22 +1100
Subject: xfs: Fix integer overflow in fs/xfs/linux-2.6/xfs_ioctl*.c

The am_hreq.opcount field in the xfs_attrmulti_by_handle() interface
is not bounded correctly. The opcount is used to determine the size
of the buffer required. The size is bounded, but can overflow and so
the size checks may not be sufficient to catch invalid opcounts.
Fix it by catching opcount values that would cause overflows before
calculating the size.

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Reviewed-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c   | 4 ++++
 fs/xfs/linux-2.6/xfs_ioctl32.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 7b26cc2fd284..699b60cbab9c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle(
 	if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
+	/* overflow check */
+	if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+		return -E2BIG;
+
 	dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 593c05b4df8d..9287135e9bfc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle(
 			   sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
+	/* overflow check */
+	if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+		return -E2BIG;
+
 	dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
-- 
cgit v1.2.3


From e2a07812e93d4a51b1b1a6f15145a1634948db47 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt
Date: Tue, 23 Mar 2010 09:52:55 +1100
Subject: xfs: add blockdev name to kthreads

This allows to see in `ps` and similar tools which kthreads are
allotted to which block device/filesystem, similar to what jbd2
does. As the process name is a fixed 16-char array, no extra
space is needed in tasks.

  PID TTY      STAT   TIME COMMAND
    2 ?        S      0:00 [kthreadd]
  197 ?        S      0:00  \_ [jbd2/sda2-8]
  198 ?        S      0:00  \_ [ext4-dio-unwrit]
  204 ?        S      0:00  \_ [flush-8:0]
 2647 ?        S      0:00  \_ [xfs_mru_cache]
 2648 ?        S      0:00  \_ [xfslogd/0]
 2649 ?        S      0:00  \_ [xfsdatad/0]
 2650 ?        S      0:00  \_ [xfsconvertd/0]
 2651 ?        S      0:00  \_ [xfsbufd/ram0]
 2652 ?        S      0:00  \_ [xfsaild/ram0]
 2653 ?        S      0:00  \_ [xfssyncd/ram0]

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Reviewed-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/linux-2.6/xfs_buf.c   | 10 ++++++----
 fs/xfs/linux-2.6/xfs_buf.h   |  2 +-
 fs/xfs/linux-2.6/xfs_super.c |  9 +++++----
 fs/xfs/linux-2.6/xfs_sync.c  |  2 +-
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 44c2b0ef9a41..f7ecc44cbbd3 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1614,7 +1614,8 @@ xfs_mapping_buftarg(
 
 STATIC int
 xfs_alloc_delwrite_queue(
-	xfs_buftarg_t		*btp)
+	xfs_buftarg_t		*btp,
+	const char		*fsname)
 {
 	int	error = 0;
 
@@ -1622,7 +1623,7 @@ xfs_alloc_delwrite_queue(
 	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
 	spin_lock_init(&btp->bt_delwrite_lock);
 	btp->bt_flags = 0;
-	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
 	if (IS_ERR(btp->bt_task)) {
 		error = PTR_ERR(btp->bt_task);
 		goto out_error;
@@ -1635,7 +1636,8 @@ out_error:
 xfs_buftarg_t *
 xfs_alloc_buftarg(
 	struct block_device	*bdev,
-	int			external)
+	int			external,
+	const char		*fsname)
 {
 	xfs_buftarg_t		*btp;
 
@@ -1647,7 +1649,7 @@ xfs_alloc_buftarg(
 		goto error;
 	if (xfs_mapping_buftarg(btp, bdev))
 		goto error;
-	if (xfs_alloc_delwrite_queue(btp))
+	if (xfs_alloc_delwrite_queue(btp, fsname))
 		goto error;
 	xfs_alloc_bufhash(btp, external);
 	return btp;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 386e7361e50e..5fbecefa5dfd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
 /*
  *	Handling of buftargs.
  */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 29f1edca76de..e8ad6dd2c10c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -789,18 +789,18 @@ xfs_open_devices(
 	 * Setup xfs_mount buffer target pointers
 	 */
 	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
 	if (!mp->m_ddev_targp)
 		goto out_close_rtdev;
 
 	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
 		if (!mp->m_rtdev_targp)
 			goto out_free_ddev_targ;
 	}
 
 	if (logdev && logdev != ddev) {
-		mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+		mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
 		if (!mp->m_logdev_targp)
 			goto out_free_rtdev_targ;
 	} else {
@@ -902,7 +902,8 @@ xfsaild_start(
 	struct xfs_ail	*ailp)
 {
 	ailp->xa_target = 0;
-	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+				    ailp->xa_mount->m_fsname);
 	if (IS_ERR(ailp->xa_task))
 		return -PTR_ERR(ailp->xa_task);
 	return 0;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a427c638d909..a7ba355c21b6 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -660,7 +660,7 @@ xfs_syncd_init(
 	mp->m_sync_work.w_syncer = xfs_sync_worker;
 	mp->m_sync_work.w_mount = mp;
 	mp->m_sync_work.w_completion = NULL;
-	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
 	if (IS_ERR(mp->m_sync_task))
 		return -PTR_ERR(mp->m_sync_task);
 	return 0;
-- 
cgit v1.2.3


From 4aaf15d1aa9673dd2cc45c48957c946cb4aa2694 Mon Sep 17 00:00:00 2001
From: Dave Chinner
Date: Mon, 8 Mar 2010 11:24:07 +1100
Subject: xfs: Add inode pin counts to traces

We don't record pin counts in inode events right now, and this makes
it difficult to track down problems related to pinning inodes. Add
the pin count to the inode trace class and add trace events for
pinning and unpinning inodes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_trace.h | 9 ++++++++-
 fs/xfs/xfs_inode.c           | 2 ++
 fs/xfs/xfs_inode_item.c      | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index fcaa62f0799e..65371859c753 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -562,18 +562,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(int, count)
+		__field(int, pincount)
 		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->ino = ip->i_ino;
 		__entry->count = atomic_read(&VFS_I(ip)->i_count);
+		__entry->pincount = atomic_read(&ip->i_pincount);
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
+	TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->count,
+		  __entry->pincount,
 		  (char *)__entry->caller_ip)
 )
 
@@ -583,6 +586,10 @@ DEFINE_EVENT(xfs_inode_class, name, \
 	TP_ARGS(ip, caller_ip))
 DEFINE_INODE_EVENT(xfs_ihold);
 DEFINE_INODE_EVENT(xfs_irele);
+DEFINE_INODE_EVENT(xfs_inode_pin);
+DEFINE_INODE_EVENT(xfs_inode_unpin);
+DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+
 /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
 DEFINE_INODE_EVENT(xfs_inode);
 #define xfs_itrace_entry(ip)    \
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0ffd56447045..8cd6e8d8fe9c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2449,6 +2449,8 @@ xfs_iunpin_nowait(
 {
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 
+	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
+
 	/* Give the log a push to start the unpinning I/O */
 	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 32e4188411c2..03471757bc88 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -543,6 +543,7 @@ xfs_inode_item_pin(
 {
 	ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
 
+	trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
 	atomic_inc(&iip->ili_inode->i_pincount);
 }
 
@@ -561,6 +562,7 @@ xfs_inode_item_unpin(
 {
 	struct xfs_inode	*ip = iip->ili_inode;
 
+	trace_xfs_inode_unpin(ip, _RET_IP_);
 	ASSERT(atomic_read(&ip->i_pincount) > 0);
 	if (atomic_dec_and_test(&ip->i_pincount))
 		wake_up(&ip->i_ipin_wait);
-- 
cgit v1.2.3


From 9abbc539bf7f299819ad0a235064a1b643ab6407 Mon Sep 17 00:00:00 2001
From: Dave Chinner
Date: Tue, 13 Apr 2010 15:06:46 +1000
Subject: xfs: add log item recovery tracing

Currently there is no tracing in log recovery, so it is difficult to
determine what is going on when something goes wrong.

Add tracing for log item recovery to provide visibility into the log
recovery process. The tracing added shows regions being extracted
from the log transactions and added to the transaction hash forming
recovery items, followed by the reordering, cancelling and finally
recovery of the items.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_trace.c |   3 +
 fs/xfs/linux-2.6/xfs_trace.h | 138 +++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_buf_item.h        |   2 +-
 fs/xfs/xfs_log_recover.c     |  44 +++++++++++---
 fs/xfs/xfs_trans.h           |   9 +++
 5 files changed, 187 insertions(+), 9 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 5a107601e969..2a460581308f 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,6 +50,9 @@
 #include "xfs_aops.h"
 #include "quota/xfs_dquot_item.h"
 #include "quota/xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 65371859c753..33f7d2b7afeb 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -32,6 +32,10 @@ struct xfs_da_node_entry;
 struct xfs_dquot;
 struct xlog_ticket;
 struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
 
 DECLARE_EVENT_CLASS(xfs_attr_list_class,
 	TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -1502,6 +1506,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
 
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+	TP_PROTO(struct log *log, struct xlog_recover *trans,
+		struct xlog_recover_item *item, int pass),
+	TP_ARGS(log, trans, item, pass),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, item)
+		__field(xlog_tid_t, tid)
+		__field(int, type)
+		__field(int, pass)
+		__field(int, count)
+		__field(int, total)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->item = (unsigned long)item;
+		__entry->tid = trans->r_log_tid;
+		__entry->type = ITEM_TYPE(item);
+		__entry->pass = pass;
+		__entry->count = item->ri_cnt;
+		__entry->total = item->ri_total;
+	),
+	TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+		  "item region count/total %d/%d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tid,
+		  __entry->pass,
+		  (void *)__entry->item,
+		  __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+		  __entry->count,
+		  __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+	TP_PROTO(struct log *log, struct xlog_recover *trans, \
+		struct xlog_recover_item *item, int pass), \
+	TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+	TP_ARGS(log, buf_f),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(__int64_t, blkno)
+		__field(unsigned short, len)
+		__field(unsigned short, flags)
+		__field(unsigned short, size)
+		__field(unsigned int, map_size)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->blkno = buf_f->blf_blkno;
+		__entry->len = buf_f->blf_len;
+		__entry->flags = buf_f->blf_flags;
+		__entry->size = buf_f->blf_size;
+		__entry->map_size = buf_f->blf_map_size;
+	),
+	TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+			"map_size %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->blkno,
+		  __entry->len,
+		  __entry->flags,
+		  __entry->size,
+		  __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+	TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+	TP_ARGS(log, in_f),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(unsigned short, size)
+		__field(int, fields)
+		__field(unsigned short, asize)
+		__field(unsigned short, dsize)
+		__field(__int64_t, blkno)
+		__field(int, len)
+		__field(int, boffset)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->ino = in_f->ilf_ino;
+		__entry->size = in_f->ilf_size;
+		__entry->fields = in_f->ilf_fields;
+		__entry->asize = in_f->ilf_asize;
+		__entry->dsize = in_f->ilf_dsize;
+		__entry->blkno = in_f->ilf_blkno;
+		__entry->len = in_f->ilf_len;
+		__entry->boffset = in_f->ilf_boffset;
+	),
+	TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+			"dsize %d, blkno 0x%llx, len %d, boffset %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->fields,
+		  __entry->asize,
+		  __entry->dsize,
+		  __entry->blkno,
+		  __entry->len,
+		  __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+	TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 217f34af00cb..df4454511f73 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -26,7 +26,7 @@ extern kmem_zone_t	*xfs_buf_item_zone;
  * have been logged.
  * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
  */
-typedef struct xfs_buf_log_format_t {
+typedef struct xfs_buf_log_format {
 	unsigned short	blf_type;	/* buf log item type indicator */
 	unsigned short	blf_size;	/* size of this item */
 	ushort		blf_flags;	/* misc state */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 22e6efdc17ea..f21eb8ad2d97 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1408,6 +1408,7 @@ xlog_recover_add_item(
 
 STATIC int
 xlog_recover_add_to_cont_trans(
+	struct log		*log,
 	xlog_recover_t		*trans,
 	xfs_caddr_t		dp,
 	int			len)
@@ -1434,6 +1435,7 @@ xlog_recover_add_to_cont_trans(
 	memcpy(&ptr[old_len], dp, len); /* d, s, l */
 	item->ri_buf[item->ri_cnt-1].i_len += len;
 	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+	trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
 	return 0;
 }
 
@@ -1452,6 +1454,7 @@ xlog_recover_add_to_cont_trans(
  */
 STATIC int
 xlog_recover_add_to_trans(
+	struct log		*log,
 	xlog_recover_t		*trans,
 	xfs_caddr_t		dp,
 	int			len)
@@ -1510,6 +1513,7 @@ xlog_recover_add_to_trans(
 	item->ri_buf[item->ri_cnt].i_addr = ptr;
 	item->ri_buf[item->ri_cnt].i_len  = len;
 	item->ri_cnt++;
+	trace_xfs_log_recover_item_add(log, trans, item, 0);
 	return 0;
 }
 
@@ -1521,7 +1525,9 @@ xlog_recover_add_to_trans(
  */
 STATIC int
 xlog_recover_reorder_trans(
-	xlog_recover_t		*trans)
+	struct log		*log,
+	xlog_recover_t		*trans,
+	int			pass)
 {
 	xlog_recover_item_t	*item, *n;
 	LIST_HEAD(sort_list);
@@ -1535,6 +1541,8 @@ xlog_recover_reorder_trans(
 		switch (ITEM_TYPE(item)) {
 		case XFS_LI_BUF:
 			if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
+				trace_xfs_log_recover_item_reorder_head(log,
+							trans, item, pass);
 				list_move(&item->ri_list, &trans->r_itemq);
 				break;
 			}
@@ -1543,6 +1551,8 @@ xlog_recover_reorder_trans(
 		case XFS_LI_QUOTAOFF:
 		case XFS_LI_EFD:
 		case XFS_LI_EFI:
+			trace_xfs_log_recover_item_reorder_tail(log,
+							trans, item, pass);
 			list_move_tail(&item->ri_list, &trans->r_itemq);
 			break;
 		default:
@@ -1592,8 +1602,10 @@ xlog_recover_do_buffer_pass1(
 	/*
 	 * If this isn't a cancel buffer item, then just return.
 	 */
-	if (!(flags & XFS_BLI_CANCEL))
+	if (!(flags & XFS_BLI_CANCEL)) {
+		trace_xfs_log_recover_buf_not_cancel(log, buf_f);
 		return;
+	}
 
 	/*
 	 * Insert an xfs_buf_cancel record into the hash table of
@@ -1627,6 +1639,7 @@ xlog_recover_do_buffer_pass1(
 	while (nextp != NULL) {
 		if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
 			nextp->bc_refcount++;
+			trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
 			return;
 		}
 		prevp = nextp;
@@ -1640,6 +1653,7 @@ xlog_recover_do_buffer_pass1(
 	bcp->bc_refcount = 1;
 	bcp->bc_next = NULL;
 	prevp->bc_next = bcp;
+	trace_xfs_log_recover_buf_cancel_add(log, buf_f);
 }
 
 /*
@@ -1779,6 +1793,8 @@ xlog_recover_do_inode_buffer(
 	unsigned int		*data_map = NULL;
 	unsigned int		map_size = 0;
 
+	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
 	switch (buf_f->blf_type) {
 	case XFS_LI_BUF:
 		data_map = buf_f->blf_data_map;
@@ -1874,6 +1890,7 @@ xlog_recover_do_inode_buffer(
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
+	struct xfs_mount	*mp,
 	xlog_recover_item_t	*item,
 	xfs_buf_t		*bp,
 	xfs_buf_log_format_t	*buf_f)
@@ -1885,6 +1902,8 @@ xlog_recover_do_reg_buffer(
 	unsigned int		map_size = 0;
 	int                     error;
 
+	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
 	switch (buf_f->blf_type) {
 	case XFS_LI_BUF:
 		data_map = buf_f->blf_data_map;
@@ -2083,6 +2102,8 @@ xlog_recover_do_dquot_buffer(
 {
 	uint			type;
 
+	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
 	/*
 	 * Filesystems are required to send in quota flags at mount time.
 	 */
@@ -2103,7 +2124,7 @@ xlog_recover_do_dquot_buffer(
 	if (log->l_quotaoffs_flag & type)
 		return;
 
-	xlog_recover_do_reg_buffer(item, bp, buf_f);
+	xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 }
 
 /*
@@ -2164,9 +2185,11 @@ xlog_recover_do_buffer_trans(
 		 */
 		cancel = xlog_recover_do_buffer_pass2(log, buf_f);
 		if (cancel) {
+			trace_xfs_log_recover_buf_cancel(log, buf_f);
 			return 0;
 		}
 	}
+	trace_xfs_log_recover_buf_recover(log, buf_f);
 	switch (buf_f->blf_type) {
 	case XFS_LI_BUF:
 		blkno = buf_f->blf_blkno;
@@ -2204,7 +2227,7 @@ xlog_recover_do_buffer_trans(
 		  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 		xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
 	} else {
-		xlog_recover_do_reg_buffer(item, bp, buf_f);
+		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 	}
 	if (error)
 		return XFS_ERROR(error);
@@ -2284,8 +2307,10 @@ xlog_recover_do_inode_trans(
 	if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
 					in_f->ilf_len, 0)) {
 		error = 0;
+		trace_xfs_log_recover_inode_cancel(log, in_f);
 		goto error;
 	}
+	trace_xfs_log_recover_inode_recover(log, in_f);
 
 	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
 			  XBF_LOCK);
@@ -2337,6 +2362,7 @@ xlog_recover_do_inode_trans(
 			/* do nothing */
 		} else {
 			xfs_buf_relse(bp);
+			trace_xfs_log_recover_inode_skip(log, in_f);
 			error = 0;
 			goto error;
 		}
@@ -2758,11 +2784,12 @@ xlog_recover_do_trans(
 	int			error = 0;
 	xlog_recover_item_t	*item;
 
-	error = xlog_recover_reorder_trans(trans);
+	error = xlog_recover_reorder_trans(log, trans, pass);
 	if (error)
 		return error;
 
 	list_for_each_entry(item, &trans->r_itemq, ri_list) {
+		trace_xfs_log_recover_item_recover(log, trans, item, pass);
 		switch (ITEM_TYPE(item)) {
 		case XFS_LI_BUF:
 			error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2919,8 +2946,9 @@ xlog_recover_process_data(
 				error = xlog_recover_unmount_trans(trans);
 				break;
 			case XLOG_WAS_CONT_TRANS:
-				error = xlog_recover_add_to_cont_trans(trans,
-						dp, be32_to_cpu(ohead->oh_len));
+				error = xlog_recover_add_to_cont_trans(log,
+						trans, dp,
+						be32_to_cpu(ohead->oh_len));
 				break;
 			case XLOG_START_TRANS:
 				xlog_warn(
@@ -2930,7 +2958,7 @@ xlog_recover_process_data(
 				break;
 			case 0:
 			case XLOG_CONTINUE_TRANS:
-				error = xlog_recover_add_to_trans(trans,
+				error = xlog_recover_add_to_trans(log, trans,
 						dp, be32_to_cpu(ohead->oh_len));
 				break;
 			default:
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 82574ef36580..c62beee0921e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header {
 #define	XFS_LI_DQUOT		0x123d
 #define	XFS_LI_QUOTAOFF		0x123e
 
+#define XFS_LI_TYPE_DESC \
+	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \
+	{ XFS_LI_EFD,		"XFS_LI_EFD" }, \
+	{ XFS_LI_IUNLINK,	"XFS_LI_IUNLINK" }, \
+	{ XFS_LI_INODE,		"XFS_LI_INODE" }, \
+	{ XFS_LI_BUF,		"XFS_LI_BUF" }, \
+	{ XFS_LI_DQUOT,		"XFS_LI_DQUOT" }, \
+	{ XFS_LI_QUOTAOFF,	"XFS_LI_QUOTAOFF" }
+
 /*
  * Transaction types.  Used to distinguish types of buffers.
  */
-- 
cgit v1.2.3


From 368e136174344c417bad6ff0380b7b3f574bf120 Mon Sep 17 00:00:00 2001
From: Dave Chinner
Date: Tue, 13 Apr 2010 15:06:50 +1000
Subject: xfs: remove duplicate code from dquot reclaim

The dquot shaker and the free-list reclaim code use exactly the same
algorithm but the code is duplicated and slightly different in each
case. Make the shaker code use the single dquot reclaim code to
remove the code duplication.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_trace.h |   2 -
 fs/xfs/quota/xfs_qm.c        | 264 ++++++++++++-------------------------------
 2 files changed, 73 insertions(+), 193 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 33f7d2b7afeb..ce6a968a8f40 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -653,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
 	TP_PROTO(struct xfs_dquot *dqp), \
 	TP_ARGS(dqp))
 DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 855827320ff6..5ca65c834bbd 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1926,53 +1926,46 @@ xfs_qm_init_quotainos(
 }
 
 
+
 /*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- * XXXsup merge this with qm_reclaim_one().
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
  */
-STATIC int
-xfs_qm_shake_freelist(
-	int howmany)
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
 {
-	int		nreclaimed;
-	xfs_dqhash_t	*hash;
-	xfs_dquot_t	*dqp, *nextdqp;
+	xfs_dquot_t	*dqpout;
+	xfs_dquot_t	*dqp;
 	int		restarts;
-	int		nflushes;
 
-	if (howmany <= 0)
-		return 0;
-
-	nreclaimed = 0;
 	restarts = 0;
-	nflushes = 0;
+	dqpout = NULL;
 
-#ifdef QUOTADEBUG
-	cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
-#endif
-	/* lock order is : hashchainlock, freelistlock, mplistlock */
- tryagain:
+	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+startagain:
 	xfs_qm_freelist_lock(xfs_Gqm);
 
-	for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
-	     ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
-	      nreclaimed < howmany); ) {
+	FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
 		struct xfs_mount *mp = dqp->q_mount;
 		xfs_dqlock(dqp);
 
 		/*
 		 * We are racing with dqlookup here. Naturally we don't
-		 * want to reclaim a dquot that lookup wants.
+		 * want to reclaim a dquot that lookup wants. We release the
+		 * freelist lock and start over, so that lookup will grab
+		 * both the dquot and the freelistlock.
 		 */
 		if (dqp->dq_flags & XFS_DQ_WANT) {
+			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+			trace_xfs_dqreclaim_want(dqp);
+
 			xfs_dqunlock(dqp);
 			xfs_qm_freelist_unlock(xfs_Gqm);
 			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return nreclaimed;
+				return NULL;
 			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-			goto tryagain;
+			goto startagain;
 		}
 
 		/*
@@ -1985,19 +1978,22 @@ xfs_qm_shake_freelist(
 			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
 			ASSERT(dqp->HL_PREVP == NULL);
 			ASSERT(list_empty(&dqp->q_mplist));
+			XQM_FREELIST_REMOVE(dqp);
+			xfs_dqunlock(dqp);
+			dqpout = dqp;
 			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-			nextdqp = dqp->dq_flnext;
-			goto off_freelist;
+			break;
 		}
 
+		ASSERT(dqp->q_hash);
 		ASSERT(!list_empty(&dqp->q_mplist));
+
 		/*
 		 * Try to grab the flush lock. If this dquot is in the process of
 		 * getting flushed to disk, we don't want to reclaim it.
 		 */
 		if (!xfs_dqflock_nowait(dqp)) {
 			xfs_dqunlock(dqp);
-			dqp = dqp->dq_flnext;
 			continue;
 		}
 
@@ -2010,21 +2006,21 @@ xfs_qm_shake_freelist(
 		if (XFS_DQ_IS_DIRTY(dqp)) {
 			int	error;
 
-			trace_xfs_dqshake_dirty(dqp);
+			trace_xfs_dqreclaim_dirty(dqp);
 
 			/*
 			 * We flush it delayed write, so don't bother
-			 * releasing the mplock.
+			 * releasing the freelist lock.
 			 */
 			error = xfs_qm_dqflush(dqp, 0);
 			if (error) {
 				xfs_fs_cmn_err(CE_WARN, mp,
-			"xfs_qm_dqflush_all: dquot %p flush failed", dqp);
+			"xfs_qm_dqreclaim: dquot %p flush failed", dqp);
 			}
 			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-			dqp = dqp->dq_flnext;
 			continue;
 		}
+
 		/*
 		 * We're trying to get the hashlock out of order. This races
 		 * with dqlookup; so, we giveup and goto the next dquot if
@@ -2033,57 +2029,71 @@ xfs_qm_shake_freelist(
 		 * waiting for the freelist lock.
 		 */
 		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-			xfs_dqfunlock(dqp);
-			xfs_dqunlock(dqp);
-			dqp = dqp->dq_flnext;
-			continue;
+			restarts++;
+			goto dqfunlock;
 		}
+
 		/*
 		 * This races with dquot allocation code as well as dqflush_all
 		 * and reclaim code. So, if we failed to grab the mplist lock,
 		 * giveup everything and start over.
 		 */
-		hash = dqp->q_hash;
-		ASSERT(hash);
 		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-			/* XXX put a sentinel so that we can come back here */
+			restarts++;
+			mutex_unlock(&dqp->q_hash->qh_lock);
 			xfs_dqfunlock(dqp);
 			xfs_dqunlock(dqp);
-			mutex_unlock(&hash->qh_lock);
 			xfs_qm_freelist_unlock(xfs_Gqm);
-			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return nreclaimed;
-			goto tryagain;
+			if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
+				return NULL;
+			goto startagain;
 		}
 
-		trace_xfs_dqshake_unlink(dqp);
-
-#ifdef QUOTADEBUG
-		cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
-			dqp, be32_to_cpu(dqp->q_core.d_id));
-#endif
 		ASSERT(dqp->q_nrefs == 0);
-		nextdqp = dqp->dq_flnext;
-		XQM_HASHLIST_REMOVE(hash, dqp);
 		list_del_init(&dqp->q_mplist);
 		mp->m_quotainfo->qi_dquots--;
 		mp->m_quotainfo->qi_dqreclaims++;
-		xfs_dqfunlock(dqp);
-		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-		mutex_unlock(&hash->qh_lock);
-
- off_freelist:
+		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
 		XQM_FREELIST_REMOVE(dqp);
+		dqpout = dqp;
+		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+		mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
+		xfs_dqfunlock(dqp);
 		xfs_dqunlock(dqp);
-		nreclaimed++;
-		XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
-		xfs_qm_dqdestroy(dqp);
-		dqp = nextdqp;
+		if (dqpout)
+			break;
+		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+			return NULL;
 	}
 	xfs_qm_freelist_unlock(xfs_Gqm);
-	return nreclaimed;
+	return dqpout;
 }
 
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+	int	howmany)
+{
+	int		nreclaimed = 0;
+	xfs_dquot_t	*dqp;
+
+	if (howmany <= 0)
+		return 0;
+
+	while (nreclaimed < howmany) {
+		dqp = xfs_qm_dqreclaim_one();
+		if (!dqp)
+			return nreclaimed;
+		xfs_qm_dqdestroy(dqp);
+		nreclaimed++;
+	}
+	return nreclaimed;
+}
 
 /*
  * The kmem_shake interface is invoked when memory is running low.
@@ -2115,134 +2125,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
 }
 
 
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-	xfs_dquot_t	*dqpout;
-	xfs_dquot_t	*dqp;
-	int		restarts;
-	int		nflushes;
-
-	restarts = 0;
-	dqpout = NULL;
-	nflushes = 0;
-
-	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
- startagain:
-	xfs_qm_freelist_lock(xfs_Gqm);
-
-	FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
-		struct xfs_mount *mp = dqp->q_mount;
-		xfs_dqlock(dqp);
-
-		/*
-		 * We are racing with dqlookup here. Naturally we don't
-		 * want to reclaim a dquot that lookup wants. We release the
-		 * freelist lock and start over, so that lookup will grab
-		 * both the dquot and the freelistlock.
-		 */
-		if (dqp->dq_flags & XFS_DQ_WANT) {
-			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-			trace_xfs_dqreclaim_want(dqp);
-
-			xfs_dqunlock(dqp);
-			xfs_qm_freelist_unlock(xfs_Gqm);
-			if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-				return NULL;
-			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-			goto startagain;
-		}
-
-		/*
-		 * If the dquot is inactive, we are assured that it is
-		 * not on the mplist or the hashlist, and that makes our
-		 * life easier.
-		 */
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(mp == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
-			ASSERT(list_empty(&dqp->q_mplist));
-			XQM_FREELIST_REMOVE(dqp);
-			xfs_dqunlock(dqp);
-			dqpout = dqp;
-			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-			break;
-		}
-
-		ASSERT(dqp->q_hash);
-		ASSERT(!list_empty(&dqp->q_mplist));
-
-		/*
-		 * Try to grab the flush lock. If this dquot is in the process of
-		 * getting flushed to disk, we don't want to reclaim it.
-		 */
-		if (!xfs_dqflock_nowait(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		/*
-		 * We have the flush lock so we know that this is not in the
-		 * process of being flushed. So, if this is dirty, flush it
-		 * DELWRI so that we don't get a freelist infested with
-		 * dirty dquots.
-		 */
-		if (XFS_DQ_IS_DIRTY(dqp)) {
-			int	error;
-
-			trace_xfs_dqreclaim_dirty(dqp);
-
-			/*
-			 * We flush it delayed write, so don't bother
-			 * releasing the freelist lock.
-			 */
-			error = xfs_qm_dqflush(dqp, 0);
-			if (error) {
-				xfs_fs_cmn_err(CE_WARN, mp,
-			"xfs_qm_dqreclaim: dquot %p flush failed", dqp);
-			}
-			xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-			continue;
-		}
-
-		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-			xfs_dqfunlock(dqp);
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		if (!mutex_trylock(&dqp->q_hash->qh_lock))
-			goto mplistunlock;
-
-		trace_xfs_dqreclaim_unlink(dqp);
-
-		ASSERT(dqp->q_nrefs == 0);
-		list_del_init(&dqp->q_mplist);
-		mp->m_quotainfo->qi_dquots--;
-		mp->m_quotainfo->qi_dqreclaims++;
-		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
-		XQM_FREELIST_REMOVE(dqp);
-		dqpout = dqp;
-		mutex_unlock(&dqp->q_hash->qh_lock);
- mplistunlock:
-		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-		xfs_dqfunlock(dqp);
-		xfs_dqunlock(dqp);
-		if (dqpout)
-			break;
-	}
-
-	xfs_qm_freelist_unlock(xfs_Gqm);
-	return dqpout;
-}
-
-
 /*------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From e6a81f13aa9aa20ef03174210aed24791865b05e Mon Sep 17 00:00:00 2001
From: Dave Chinner
Date: Tue, 13 Apr 2010 15:06:51 +1000
Subject: xfs: convert the dquot hash list to use list heads

Convert the dquot hash list on the filesystem to use listhead
infrastructure rather than the roll-your-own in the quota code.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_trace.h   |  1 -
 fs/xfs/quota/xfs_dquot.c       | 32 ++++++++++----------------------
 fs/xfs/quota/xfs_dquot.h       |  9 ++-------
 fs/xfs/quota/xfs_qm.c          |  9 +++++----
 fs/xfs/quota/xfs_qm_syscalls.c | 34 +++++++++++++---------------------
 fs/xfs/quota/xfs_quota_priv.h  | 33 ---------------------------------
 6 files changed, 30 insertions(+), 88 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ce6a968a8f40..8a319cfd2901 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -667,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
 DEFINE_DQUOT_EVENT(xfs_dqget_hit);
 DEFINE_DQUOT_EVENT(xfs_dqget_miss);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 838289b92bb9..ad64ab62d9c5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -122,8 +122,7 @@ xfs_qm_dqinit(
 		 dqp->q_nrefs = 0;
 		 dqp->q_blkno = 0;
 		 INIT_LIST_HEAD(&dqp->q_mplist);
-		 dqp->HL_NEXT = NULL;
-		 dqp->HL_PREVP = NULL;
+		 INIT_LIST_HEAD(&dqp->q_hashlist);
 		 dqp->q_bufoffset = 0;
 		 dqp->q_fileoffset = 0;
 		 dqp->q_transp = NULL;
@@ -752,7 +751,6 @@ xfs_qm_dqlookup(
 {
 	xfs_dquot_t		*dqp;
 	uint			flist_locked;
-	xfs_dquot_t		*d;
 
 	ASSERT(mutex_is_locked(&qh->qh_lock));
 
@@ -761,7 +759,7 @@ xfs_qm_dqlookup(
 	/*
 	 * Traverse the hashchain looking for a match
 	 */
-	for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
+	list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
 		/*
 		 * We already have the hashlock. We don't need the
 		 * dqlock to look at the id field of the dquot, since the
@@ -828,21 +826,10 @@ xfs_qm_dqlookup(
 			 * move the dquot to the front of the hashchain
 			 */
 			ASSERT(mutex_is_locked(&qh->qh_lock));
-			if (dqp->HL_PREVP != &qh->qh_next) {
-				trace_xfs_dqlookup_move(dqp);
-				if ((d = dqp->HL_NEXT))
-					d->HL_PREVP = dqp->HL_PREVP;
-				*(dqp->HL_PREVP) = d;
-				d = qh->qh_next;
-				d->HL_PREVP = &dqp->HL_NEXT;
-				dqp->HL_NEXT = d;
-				dqp->HL_PREVP = &qh->qh_next;
-				qh->qh_next = dqp;
-			}
+			list_move(&dqp->q_hashlist, &qh->qh_list);
 			trace_xfs_dqlookup_done(dqp);
 			*O_dqpp = dqp;
-			ASSERT(mutex_is_locked(&qh->qh_lock));
-			return (0);
+			return 0;
 		}
 	}
 
@@ -1034,7 +1021,8 @@ xfs_qm_dqget(
 	 */
 	ASSERT(mutex_is_locked(&h->qh_lock));
 	dqp->q_hash = h;
-	XQM_HASHLIST_INSERT(h, dqp);
+	list_add(&dqp->q_hashlist, &h->qh_list);
+	h->qh_version++;
 
 	/*
 	 * Attach this dquot to this filesystem's list of all dquots,
@@ -1387,7 +1375,7 @@ int
 xfs_qm_dqpurge(
 	xfs_dquot_t	*dqp)
 {
-	xfs_dqhash_t	*thishash;
+	xfs_dqhash_t	*qh = dqp->q_hash;
 	xfs_mount_t	*mp = dqp->q_mount;
 
 	ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
@@ -1453,8 +1441,8 @@ xfs_qm_dqpurge(
 	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
 	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
 
-	thishash = dqp->q_hash;
-	XQM_HASHLIST_REMOVE(thishash, dqp);
+	list_del_init(&dqp->q_hashlist);
+	qh->qh_version++;
 	list_del_init(&dqp->q_mplist);
 	mp->m_quotainfo->qi_dqreclaims++;
 	mp->m_quotainfo->qi_dquots--;
@@ -1470,7 +1458,7 @@ xfs_qm_dqpurge(
 	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
-	mutex_unlock(&thishash->qh_lock);
+	mutex_unlock(&qh->qh_lock);
 	return (0);
 }
 
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 6992a67c165a..169b3c24af79 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -33,17 +33,12 @@
  * The hash chain headers (hash buckets)
  */
 typedef struct xfs_dqhash {
-	struct xfs_dquot *qh_next;
+	struct list_head  qh_list;
 	struct mutex	  qh_lock;
 	uint		  qh_version;	/* ever increasing version */
 	uint		  qh_nelems;	/* number of dquots on the list */
 } xfs_dqhash_t;
 
-typedef struct xfs_dqlink {
-	struct xfs_dquot  *ql_next;	/* forward link */
-	struct xfs_dquot **ql_prevp;	/* pointer to prev ql_next */
-} xfs_dqlink_t;
-
 struct xfs_mount;
 struct xfs_trans;
 
@@ -57,7 +52,6 @@ struct xfs_trans;
 typedef struct xfs_dqmarker {
 	struct xfs_dquot*dqm_flnext;	/* link to freelist: must be first */
 	struct xfs_dquot*dqm_flprev;
-	xfs_dqlink_t	 dqm_hashlist;	/* link to the hash chain */
 	uint		 dqm_flags;	/* various flags (XFS_DQ_*) */
 } xfs_dqmarker_t;
 
@@ -67,6 +61,7 @@ typedef struct xfs_dqmarker {
 typedef struct xfs_dquot {
 	xfs_dqmarker_t	 q_lists;	/* list ptrs, q_flags (marker) */
 	struct list_head q_mplist;	/* mount's list of dquots */
+	struct list_head q_hashlist;	/* mount's list of dquots */
 	xfs_dqhash_t	*q_hash;	/* the hashchain header */
 	struct xfs_mount*q_mount;	/* filesystem this relates to */
 	struct xfs_trans*q_transp;	/* trans this belongs to currently */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5ca65c834bbd..08e97f1ef653 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -277,7 +277,7 @@ xfs_qm_rele_quotafs_ref(
 		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
 			ASSERT(dqp->q_mount == NULL);
 			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
+			ASSERT(list_empty(&dqp->q_hashlist));
 			ASSERT(list_empty(&dqp->q_mplist));
 			XQM_FREELIST_REMOVE(dqp);
 			xfs_dqunlock(dqp);
@@ -1176,7 +1176,7 @@ xfs_qm_list_init(
 	int		n)
 {
 	mutex_init(&list->qh_lock);
-	list->qh_next = NULL;
+	INIT_LIST_HEAD(&list->qh_list);
 	list->qh_version = 0;
 	list->qh_nelems = 0;
 }
@@ -1976,7 +1976,7 @@ startagain:
 		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
 			ASSERT(mp == NULL);
 			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(dqp->HL_PREVP == NULL);
+			ASSERT(list_empty(&dqp->q_hashlist));
 			ASSERT(list_empty(&dqp->q_mplist));
 			XQM_FREELIST_REMOVE(dqp);
 			xfs_dqunlock(dqp);
@@ -2053,7 +2053,8 @@ startagain:
 		list_del_init(&dqp->q_mplist);
 		mp->m_quotainfo->qi_dquots--;
 		mp->m_quotainfo->qi_dqreclaims++;
-		XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
+		list_del_init(&dqp->q_hashlist);
+		dqp->q_hash->qh_version++;
 		XQM_FREELIST_REMOVE(dqp);
 		dqpout = dqp;
 		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c54fa7790bd8..c82e319f9df4 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -932,6 +932,7 @@ struct mutex  qcheck_lock;
 
 typedef struct dqtest {
 	xfs_dqmarker_t	q_lists;
+	struct list_head q_hashlist;
 	xfs_dqhash_t	*q_hash;	/* the hashchain header */
 	xfs_mount_t	*q_mount;	/* filesystem this relates to */
 	xfs_dqid_t	d_id;		/* user id or group id */
@@ -942,14 +943,9 @@ typedef struct dqtest {
 STATIC void
 xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
 {
-	xfs_dquot_t *d;
-	if (((d) = (h)->qh_next))
-		(d)->HL_PREVP = &((dqp)->HL_NEXT);
-	(dqp)->HL_NEXT = d;
-	(dqp)->HL_PREVP = &((h)->qh_next);
-	(h)->qh_next = (xfs_dquot_t *)dqp;
-	(h)->qh_version++;
-	(h)->qh_nelems++;
+	list_add(&dqp->q_hashlist, &h->qh_list);
+	h->qh_version++;
+	h->qh_nelems++;
 }
 STATIC void
 xfs_qm_dqtest_print(
@@ -1061,9 +1057,7 @@ xfs_qm_internalqcheck_dqget(
 	xfs_dqhash_t	*h;
 
 	h = DQTEST_HASH(mp, id, type);
-	for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
-	     d = (xfs_dqtest_t *) d->HL_NEXT) {
-		/* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
+	list_for_each_entry(d, &h->qh_list, q_hashlist) {
 		if (d->d_id == id && mp == d->q_mount) {
 			*O_dq = d;
 			return (0);
@@ -1074,6 +1068,7 @@ xfs_qm_internalqcheck_dqget(
 	d->d_id = id;
 	d->q_mount = mp;
 	d->q_hash = h;
+	INIT_LIST_HEAD(&d->q_hashlist);
 	xfs_qm_hashinsert(h, d);
 	*O_dq = d;
 	return (0);
@@ -1180,8 +1175,6 @@ xfs_qm_internalqcheck(
 	xfs_ino_t	lastino;
 	int		done, count;
 	int		i;
-	xfs_dqtest_t	*d, *e;
-	xfs_dqhash_t	*h1;
 	int		error;
 
 	lastino = 0;
@@ -1221,19 +1214,18 @@ xfs_qm_internalqcheck(
 	}
 	cmn_err(CE_DEBUG, "Checking results against system dquots");
 	for (i = 0; i < qmtest_hashmask; i++) {
-		h1 = &qmtest_udqtab[i];
-		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+		xfs_dqtest_t	*d, *n;
+		xfs_dqhash_t	*h;
+
+		h = &qmtest_udqtab[i];
+		list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
 			xfs_dqtest_cmp(d);
-			e = (xfs_dqtest_t *) d->HL_NEXT;
 			kmem_free(d);
-			d = e;
 		}
-		h1 = &qmtest_gdqtab[i];
-		for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+		h = &qmtest_gdqtab[i];
+		list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
 			xfs_dqtest_cmp(d);
-			e = (xfs_dqtest_t *) d->HL_NEXT;
 			kmem_free(d);
-			d = e;
 		}
 	}
 
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 6f4bbae51aca..3a1b9aa763fc 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -72,46 +72,13 @@
 	!dqp->q_core.d_rtbcount && \
 	!dqp->q_core.d_icount)
 
-#define HL_PREVP	dq_hashlist.ql_prevp
-#define HL_NEXT		dq_hashlist.ql_next
-
-
-#define _LIST_REMOVE(h, dqp, PVP, NXT)				\
-	{							\
-		 xfs_dquot_t *d;				\
-		 if (((d) = (dqp)->NXT))				\
-			 (d)->PVP = (dqp)->PVP;			\
-		 *((dqp)->PVP) = d;				\
-		 (dqp)->NXT = NULL;				\
-		 (dqp)->PVP = NULL;				\
-		 (h)->qh_version++;				\
-		 (h)->qh_nelems--;				\
-	}
-
-#define _LIST_INSERT(h, dqp, PVP, NXT)				\
-	{							\
-		 xfs_dquot_t *d;				\
-		 if (((d) = (h)->qh_next))			\
-			 (d)->PVP = &((dqp)->NXT);		\
-		 (dqp)->NXT = d;				\
-		 (dqp)->PVP = &((h)->qh_next);			\
-		 (h)->qh_next = dqp;				\
-		 (h)->qh_version++;				\
-		 (h)->qh_nelems++;				\
-	 }
-
 #define FOREACH_DQUOT_IN_FREELIST(dqp, qlist)	\
 for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
      (dqp) = (dqp)->dq_flnext)
 
-#define XQM_HASHLIST_INSERT(h, dqp)	\
-	 _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
-
 #define XQM_FREELIST_INSERT(h, dqp)	\
 	 xfs_qm_freelist_append(h, dqp)
 
-#define XQM_HASHLIST_REMOVE(h, dqp)	\
-	 _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
 #define XQM_FREELIST_REMOVE(dqp)	\
 	 xfs_qm_freelist_unlink(dqp)
 
-- 
cgit v1.2.3


From df308bcfec27e0c6bc83715dfd417caff5c33f19 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Fri, 12 Mar 2010 10:59:16 +0000
Subject: xfs: remove periodic superblock writeback

All modifications to the superblock are done transactional through
xfs_trans_log_buf, so there is no reason to initiate periodic
asynchronous writeback.  This only removes the superblock from the
delwri list and will lead to sub-optimal I/O scheduling.

Cut down xfs_sync_fsdata now that it's only used for synchronous
superblock writes and move the log coverage checks into the two
callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_sync.c | 90 ++++++++++++++-------------------------------
 1 file changed, 27 insertions(+), 63 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a7ba355c21b6..728db015f39c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -356,68 +356,24 @@ xfs_commit_dummy_trans(
 
 STATIC int
 xfs_sync_fsdata(
-	struct xfs_mount	*mp,
-	int			flags)
+	struct xfs_mount	*mp)
 {
 	struct xfs_buf		*bp;
-	struct xfs_buf_log_item	*bip;
-	int			error = 0;
 
 	/*
-	 * If this is xfssyncd() then only sync the superblock if we can
-	 * lock it without sleeping and it is not pinned.
+	 * If the buffer is pinned then push on the log so we won't get stuck
+	 * waiting in the write for someone, maybe ourselves, to flush the log.
+	 *
+	 * Even though we just pushed the log above, we did not have the
+	 * superblock buffer locked at that point so it can become pinned in
+	 * between there and here.
 	 */
-	if (flags & SYNC_TRYLOCK) {
-		ASSERT(!(flags & SYNC_WAIT));
-
-		bp = xfs_getsb(mp, XBF_TRYLOCK);
-		if (!bp)
-			goto out;
-
-		bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
-		if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
-			goto out_brelse;
-	} else {
-		bp = xfs_getsb(mp, 0);
-
-		/*
-		 * If the buffer is pinned then push on the log so we won't
-		 * get stuck waiting in the write for someone, maybe
-		 * ourselves, to flush the log.
-		 *
-		 * Even though we just pushed the log above, we did not have
-		 * the superblock buffer locked at that point so it can
-		 * become pinned in between there and here.
-		 */
-		if (XFS_BUF_ISPINNED(bp))
-			xfs_log_force(mp, 0);
-	}
-
-
-	if (flags & SYNC_WAIT)
-		XFS_BUF_UNASYNC(bp);
-	else
-		XFS_BUF_ASYNC(bp);
-
-	error = xfs_bwrite(mp, bp);
-	if (error)
-		return error;
-
-	/*
-	 * If this is a data integrity sync make sure all pending buffers
-	 * are flushed out for the log coverage check below.
-	 */
-	if (flags & SYNC_WAIT)
-		xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-	if (xfs_log_need_covered(mp))
-		error = xfs_commit_dummy_trans(mp, flags);
-	return error;
+	bp = xfs_getsb(mp, 0);
+	if (XFS_BUF_ISPINNED(bp))
+		xfs_log_force(mp, 0);
 
- out_brelse:
-	xfs_buf_relse(bp);
- out:
-	return error;
+	XFS_BUF_UNASYNC(bp);
+	return xfs_bwrite(mp, bp);
 }
 
 /*
@@ -441,7 +397,7 @@ int
 xfs_quiesce_data(
 	struct xfs_mount	*mp)
 {
-	int error;
+	int			error, error2 = 0;
 
 	/* push non-blocking */
 	xfs_sync_data(mp, 0);
@@ -452,13 +408,20 @@ xfs_quiesce_data(
 	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
-	error = xfs_sync_fsdata(mp, SYNC_WAIT);
+	error = xfs_sync_fsdata(mp);
+
+	/* make sure all delwri buffers are written out */
+	xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+	/* mark the log as covered if needed */
+	if (xfs_log_need_covered(mp))
+		error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
 
 	/* flush data-only devices */
 	if (mp->m_rtdev_targp)
 		XFS_bflush(mp->m_rtdev_targp);
 
-	return error;
+	return error ? error : error2;
 }
 
 STATIC void
@@ -581,9 +544,9 @@ xfs_flush_inodes(
 }
 
 /*
- * Every sync period we need to unpin all items, reclaim inodes, sync
- * quota and write out the superblock. We might need to cover the log
- * to indicate it is idle.
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle.
  */
 STATIC void
 xfs_sync_worker(
@@ -597,7 +560,8 @@ xfs_sync_worker(
 		xfs_reclaim_inodes(mp, 0);
 		/* dgc: errors ignored here */
 		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-		error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
+		if (xfs_log_need_covered(mp))
+			error = xfs_commit_dummy_trans(mp, 0);
 	}
 	mp->m_sync_seq++;
 	wake_up(&mp->m_wait_single_sync_task);
-- 
cgit v1.2.3


From 8c38366f99f83a7fa441e0c0669fefc18615e005 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Fri, 12 Mar 2010 10:59:40 +0000
Subject: xfs: enforce synchronous writes in xfs_bwrite

xfs_bwrite is used with the intention of synchronously writing out
buffers, but currently it does not actually clear the async flag if
that's left from previous writes but instead implements async
behaviour if it finds it.  Remove the code handling asynchronous
writes as we've got rid of those entirely outside of the log and
delwri buffers, and make sure that we clear the async and read flags
before writing the buffer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c  | 17 ++++++-----------
 fs/xfs/linux-2.6/xfs_sync.c |  1 -
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f7ecc44cbbd3..f01de3c55c43 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1007,25 +1007,20 @@ xfs_bwrite(
 	struct xfs_mount	*mp,
 	struct xfs_buf		*bp)
 {
-	int			iowait = (bp->b_flags & XBF_ASYNC) == 0;
-	int			error = 0;
+	int			error;
 
 	bp->b_strat = xfs_bdstrat_cb;
 	bp->b_mount = mp;
 	bp->b_flags |= XBF_WRITE;
-	if (!iowait)
-		bp->b_flags |= _XBF_RUN_QUEUES;
+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
 
 	xfs_buf_delwri_dequeue(bp);
 	xfs_buf_iostrategy(bp);
 
-	if (iowait) {
-		error = xfs_buf_iowait(bp);
-		if (error)
-			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-		xfs_buf_relse(bp);
-	}
-
+	error = xfs_buf_iowait(bp);
+	if (error)
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+	xfs_buf_relse(bp);
 	return error;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 728db015f39c..3884e20bc14e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -372,7 +372,6 @@ xfs_sync_fsdata(
 	if (XFS_BUF_ISPINNED(bp))
 		xfs_log_force(mp, 0);
 
-	XFS_BUF_UNASYNC(bp);
 	return xfs_bwrite(mp, bp);
 }
 
-- 
cgit v1.2.3


From fce1cad651e3cf2779ed8f9e6608daf50d29daaf Mon Sep 17 00:00:00 2001
From: Andrea Gelmini
Date: Thu, 25 Mar 2010 17:22:41 +0000
Subject: xfs: xfs_trace.c: duplicated include

fs/xfs/linux-2.6/xfs_trace.c: xfs_attr_sf.h is included more than once.

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_trace.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 2a460581308f..207fa77f63ae 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -41,7 +41,6 @@
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
-#include "xfs_attr_sf.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
-- 
cgit v1.2.3


From 37bc5743fdc29f60fb104cd9031babbabddff25a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Tue, 20 Apr 2010 17:00:59 +1000
Subject: xfs: wait for direct I/O to complete in fsync and write_inode

We need to wait for all pending direct I/O requests before taking care of
metadata in fsync and write_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <david@fromorbit.com>

---
 fs/xfs/linux-2.6/xfs_file.c  | 2 ++
 fs/xfs/linux-2.6/xfs_super.c | 1 +
 2 files changed, 3 insertions(+)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 42dd3bcfba6b..d8fb1b5d6cb5 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -115,6 +115,8 @@ xfs_file_fsync(
 
 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
 
+	xfs_ioend_wait(ip);
+
 	/*
 	 * We always need to make sure that the required inode state is safe on
 	 * disk.  The inode might be clean but we still might need to force the
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index e8ad6dd2c10c..e9002513e08f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1093,6 +1093,7 @@ xfs_fs_write_inode(
 		 * the code will only flush the inode if it isn't already
 		 * being flushed.
 		 */
+		xfs_ioend_wait(ip);
 		xfs_ilock(ip, XFS_ILOCK_SHARED);
 		if (ip->i_update_core) {
 			error = xfs_log_inode(ip);
-- 
cgit v1.2.3


From 2d1ff3c75a4642062d314634290be6d8da4ffb03 Mon Sep 17 00:00:00 2001
From: Tao Ma
Date: Thu, 29 Apr 2010 15:13:56 +1000
Subject: xfs: Make fiemap work in query mode.

According to Documentation/filesystems/fiemap.txt, If fm_extent_count
is zero, then the fm_extents[] array is ignored (no extents will be
returned), and the fm_mapped_extents count will hold the number of
extents needed.

But as the commit 97db39a1f6f69e906e98118392400de5217aa33a has changed
bmv_count to the caller's input buffer, this number query function can't
work any more. As this commit is written to change bmv_count from
MAXEXTNUM because of ENOMEM.

This patch just try to  set bm.bmv_count to something sane.
Thanks to Dave Chinner <david@fromorbit.com> for the suggestion.

Cc: Eric Sandeen <sandeen@redhat.com>
Cc: Alex Elder <aelder@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e65a7937f3a4..9c8019c78c92 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,7 +673,10 @@ xfs_vn_fiemap(
 		bm.bmv_length = BTOBB(length);
 
 	/* We add one because in getbmap world count includes the header */
-	bm.bmv_count = fieinfo->fi_extents_max + 1;
+	bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+					fieinfo->fi_extents_max + 1;
+	bm.bmv_count = min_t(__s32, bm.bmv_count,
+			     (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
 	bm.bmv_iflags = BMV_IF_PREALLOC;
 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
 		bm.bmv_iflags |= BMV_IF_ATTRFORK;
-- 
cgit v1.2.3


From 046f1685bb5211c3dea74fda0198c19171e9abc9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:52 +0000
Subject: xfs: remove iomap_target

Instead of using the iomap_target field in struct xfs_iomap
and the IOMAP_REALTIME flag just use the already existing
xfs_find_bdev_for_inode helper.  There's some fallout as we
need to pass the inode in a few more places, which we also
use to sanitize some calling conventions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 39 ++++++++++++++++++---------------------
 fs/xfs/xfs_iomap.c          |  6 ------
 fs/xfs/xfs_iomap.h          |  2 --
 3 files changed, 18 insertions(+), 29 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0f8b9968a803..49dec212da40 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -103,8 +103,9 @@ xfs_count_page_state(
 
 STATIC struct block_device *
 xfs_find_bdev_for_inode(
-	struct xfs_inode	*ip)
+	struct inode		*inode)
 {
+	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 
 	if (XFS_IS_REALTIME_INODE(ip))
@@ -554,19 +555,19 @@ xfs_add_to_ioend(
 
 STATIC void
 xfs_map_buffer(
+	struct inode		*inode,
 	struct buffer_head	*bh,
 	xfs_iomap_t		*mp,
-	xfs_off_t		offset,
-	uint			block_bits)
+	xfs_off_t		offset)
 {
 	sector_t		bn;
 
 	ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
 
-	bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) +
-	      ((offset - mp->iomap_offset) >> block_bits);
+	bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+	      ((offset - mp->iomap_offset) >> inode->i_blkbits);
 
-	ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
+	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
 
 	bh->b_blocknr = bn;
 	set_buffer_mapped(bh);
@@ -574,17 +575,17 @@ xfs_map_buffer(
 
 STATIC void
 xfs_map_at_offset(
+	struct inode		*inode,
 	struct buffer_head	*bh,
-	loff_t			offset,
-	int			block_bits,
-	xfs_iomap_t		*iomapp)
+	xfs_iomap_t		*iomapp,
+	xfs_off_t		offset)
 {
 	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
 	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
 
 	lock_buffer(bh);
-	xfs_map_buffer(bh, iomapp, offset, block_bits);
-	bh->b_bdev = iomapp->iomap_target->bt_bdev;
+	xfs_map_buffer(inode, bh, iomapp, offset);
+	bh->b_bdev = xfs_find_bdev_for_inode(inode);
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
 	clear_buffer_unwritten(bh);
@@ -750,7 +751,6 @@ xfs_convert_page(
 	xfs_off_t		end_offset;
 	unsigned long		p_offset;
 	unsigned int		type;
-	int			bbits = inode->i_blkbits;
 	int			len, page_dirty;
 	int			count = 0, done = 0, uptodate = 1;
  	xfs_off_t		offset = page_offset(page);
@@ -814,7 +814,7 @@ xfs_convert_page(
 			ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
 			ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
 
-			xfs_map_at_offset(bh, offset, bbits, mp);
+			xfs_map_at_offset(inode, bh, mp, offset);
 			if (startio) {
 				xfs_add_to_ioend(inode, bh, offset,
 						type, ioendp, done);
@@ -1174,8 +1174,7 @@ xfs_page_state_convert(
 				iomap_valid = xfs_iomap_valid(&iomap, offset);
 			}
 			if (iomap_valid) {
-				xfs_map_at_offset(bh, offset,
-						inode->i_blkbits, &iomap);
+				xfs_map_at_offset(inode, bh, &iomap, offset);
 				if (startio) {
 					xfs_add_to_ioend(inode, bh, offset,
 							type, &ioend,
@@ -1473,10 +1472,8 @@ __xfs_get_blocks(
 		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
 		 */
-		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
-			xfs_map_buffer(bh_result, &iomap, offset,
-				       inode->i_blkbits);
-		}
+		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN))
+			xfs_map_buffer(inode, bh_result, &iomap, offset);
 		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
 			if (direct)
 				bh_result->b_private = inode;
@@ -1488,7 +1485,7 @@ __xfs_get_blocks(
 	 * If this is a realtime file, data may be on a different device.
 	 * to that pointed to from the buffer_head b_bdev currently.
 	 */
-	bh_result->b_bdev = iomap.iomap_target->bt_bdev;
+	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
 
 	/*
 	 * If we previously allocated a block out beyond eof and we are now
@@ -1612,7 +1609,7 @@ xfs_vm_direct_IO(
 	struct block_device *bdev;
 	ssize_t		ret;
 
-	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
+	bdev = xfs_find_bdev_for_inode(inode);
 
 	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
 					IOMAP_UNWRITTEN : IOMAP_READ);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2d9bce7fcf85..7327d3f3bf64 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -72,12 +72,6 @@ xfs_imap_to_bmap(
 	iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
 	iomapp->iomap_flags = flags;
 
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		iomapp->iomap_flags |= IOMAP_REALTIME;
-		iomapp->iomap_target = mp->m_rtdev_targp;
-	} else {
-		iomapp->iomap_target = mp->m_ddev_targp;
-	}
 	start_block = imap->br_startblock;
 	if (start_block == HOLESTARTBLOCK) {
 		iomapp->iomap_bn = IOMAP_DADDR_NULL;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 174f29990991..a85fa90f6fe7 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -25,7 +25,6 @@ typedef enum {				/* iomap_flags values */
 	IOMAP_READ =		0,	/* mapping for a read */
 	IOMAP_HOLE =		0x02,	/* mapping covers a hole  */
 	IOMAP_DELAY =		0x04,	/* mapping covers delalloc region  */
-	IOMAP_REALTIME =	0x10,	/* mapping on the realtime device  */
 	IOMAP_UNWRITTEN =	0x20,	/* mapping covers allocated */
 					/* but uninitialized file data  */
 	IOMAP_NEW =		0x40	/* just allocate */
@@ -71,7 +70,6 @@ typedef enum {
 
 typedef struct xfs_iomap {
 	xfs_daddr_t		iomap_bn;	/* first 512B blk of mapping */
-	xfs_buftarg_t		*iomap_target;
 	xfs_off_t		iomap_offset;	/* offset of mapping, bytes */
 	xfs_off_t		iomap_bsize;	/* size of mapping, bytes */
 	xfs_off_t		iomap_delta;	/* offset into mapping, bytes */
-- 
cgit v1.2.3


From 9563b3d8998c78d5b7e718b546d5f68037c494fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:53 +0000
Subject: xfs: remove iomap_delta

The iomap_delta field in struct xfs_iomap just contains the
difference between the offset passed to xfs_iomap and the
iomap_offset.  Just calculate it in the only caller that cares.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 6 ++++--
 fs/xfs/xfs_iomap.c          | 1 -
 fs/xfs/xfs_iomap.h          | 1 -
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 49dec212da40..577d0b448732 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1512,9 +1512,11 @@ __xfs_get_blocks(
 	}
 
 	if (direct || size > (1 << inode->i_blkbits)) {
-		ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
+		xfs_off_t iomap_delta = offset - iomap.iomap_offset;
+
+		ASSERT(iomap.iomap_bsize - iomap_delta > 0);
 		offset = min_t(xfs_off_t,
-				iomap.iomap_bsize - iomap.iomap_delta, size);
+				iomap.iomap_bsize - iomap_delta, size);
 		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
 	}
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7327d3f3bf64..c6b409e0f013 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -68,7 +68,6 @@ xfs_imap_to_bmap(
 	xfs_fsblock_t	start_block;
 
 	iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-	iomapp->iomap_delta = offset - iomapp->iomap_offset;
 	iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
 	iomapp->iomap_flags = flags;
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a85fa90f6fe7..db9299631ee4 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -72,7 +72,6 @@ typedef struct xfs_iomap {
 	xfs_daddr_t		iomap_bn;	/* first 512B blk of mapping */
 	xfs_off_t		iomap_offset;	/* offset of mapping, bytes */
 	xfs_off_t		iomap_bsize;	/* size of mapping, bytes */
-	xfs_off_t		iomap_delta;	/* offset into mapping, bytes */
 	iomap_flags_t		iomap_flags;
 } xfs_iomap_t;
 
-- 
cgit v1.2.3


From 8699bb0a480193e62d5ccb9c86e2c26b407090a8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:54 +0000
Subject: xfs: report iomap_offset and iomap_bsize in block base

Report the iomap_offset and iomap_bsize fields of struct xfs_iomap
in terms of fsblocks instead of in terms of disk blocks.  Shift the
byte conversions into the callers temporarily, but they will
disappear or get cleaned up later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 36 +++++++++++++++++++++++++-----------
 fs/xfs/xfs_iomap.c          |  5 ++---
 2 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 577d0b448732..303a779406c0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -320,11 +320,16 @@ xfs_map_blocks(
 
 STATIC int
 xfs_iomap_valid(
+	struct inode		*inode,
 	xfs_iomap_t		*iomapp,
 	loff_t			offset)
 {
-	return offset >= iomapp->iomap_offset &&
-		offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
+	xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, iomapp->iomap_offset);
+	xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, iomapp->iomap_bsize);
+
+	return offset >= iomap_offset &&
+		offset < iomap_offset + iomap_bsize;
 }
 
 /*
@@ -561,11 +566,13 @@ xfs_map_buffer(
 	xfs_off_t		offset)
 {
 	sector_t		bn;
+	struct xfs_mount	*m = XFS_I(inode)->i_mount;
+	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset);
 
 	ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
 
 	bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-	      ((offset - mp->iomap_offset) >> inode->i_blkbits);
+	      ((offset - iomap_offset) >> inode->i_blkbits);
 
 	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
 
@@ -806,7 +813,7 @@ xfs_convert_page(
 			else
 				type = IOMAP_DELAY;
 
-			if (!xfs_iomap_valid(mp, offset)) {
+			if (!xfs_iomap_valid(inode, mp, offset)) {
 				done = 1;
 				continue;
 			}
@@ -1116,7 +1123,7 @@ xfs_page_state_convert(
 		}
 
 		if (iomap_valid)
-			iomap_valid = xfs_iomap_valid(&iomap, offset);
+			iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
 
 		/*
 		 * First case, map an unwritten extent and prepare for
@@ -1171,7 +1178,7 @@ xfs_page_state_convert(
 						&iomap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(&iomap, offset);
+				iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
 			}
 			if (iomap_valid) {
 				xfs_map_at_offset(inode, bh, &iomap, offset);
@@ -1201,7 +1208,7 @@ xfs_page_state_convert(
 						&iomap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(&iomap, offset);
+				iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
 			}
 
 			/*
@@ -1241,7 +1248,11 @@ xfs_page_state_convert(
 		xfs_start_page_writeback(page, 1, count);
 
 	if (ioend && iomap_valid) {
-		offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
+		struct xfs_mount	*m = XFS_I(inode)->i_mount;
+		xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, iomap.iomap_offset);
+		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(m, iomap.iomap_bsize);
+
+		offset = (iomap_offset + iomap_bsize - 1) >>
 					PAGE_CACHE_SHIFT;
 		tlast = min_t(pgoff_t, offset, last_index);
 		xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
@@ -1512,11 +1523,14 @@ __xfs_get_blocks(
 	}
 
 	if (direct || size > (1 << inode->i_blkbits)) {
-		xfs_off_t iomap_delta = offset - iomap.iomap_offset;
+		struct xfs_mount	*mp = XFS_I(inode)->i_mount;
+		xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, iomap.iomap_offset);
+		xfs_off_t		iomap_delta = offset - iomap_offset;
+		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, iomap.iomap_bsize);
 
-		ASSERT(iomap.iomap_bsize - iomap_delta > 0);
+		ASSERT(iomap_bsize - iomap_delta > 0);
 		offset = min_t(xfs_off_t,
-				iomap.iomap_bsize - iomap_delta, size);
+				iomap_bsize - iomap_delta, size);
 		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
 	}
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c6b409e0f013..49b5ad22a9d8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -64,11 +64,10 @@ xfs_imap_to_bmap(
 	int		imaps,			/* Number of imap entries */
 	int		flags)
 {
-	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fsblock_t	start_block;
 
-	iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-	iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
+	iomapp->iomap_offset = imap->br_startoff;
+	iomapp->iomap_bsize = imap->br_blockcount;
 	iomapp->iomap_flags = flags;
 
 	start_block = imap->br_startblock;
-- 
cgit v1.2.3


From e513182d4d7ec8f1870ae368c549ef2838e2c105 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:55 +0000
Subject: xfs: report iomap_bn in block base

Report the iomap_bn field of struct xfs_iomap in terms of filesystem
blocks instead of in terms of bytes.  Shift the byte conversions
into the caller, and replace the IOMAP_DELAY and IOMAP_HOLE flag
checks with checks for HOLESTARTBLOCK and DELAYSTARTBLOCK.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 19 +++++++++++--------
 fs/xfs/xfs_iomap.c          | 19 +++++--------------
 fs/xfs/xfs_iomap.h          |  4 ----
 3 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 303a779406c0..2b09cc34dd07 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -568,10 +568,12 @@ xfs_map_buffer(
 	sector_t		bn;
 	struct xfs_mount	*m = XFS_I(inode)->i_mount;
 	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset);
+	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), mp->iomap_bn);
 
-	ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
+	ASSERT(mp->iomap_bn != HOLESTARTBLOCK);
+	ASSERT(mp->iomap_bn != DELAYSTARTBLOCK);
 
-	bn = (mp->iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
 	      ((offset - iomap_offset) >> inode->i_blkbits);
 
 	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
@@ -587,8 +589,8 @@ xfs_map_at_offset(
 	xfs_iomap_t		*iomapp,
 	xfs_off_t		offset)
 {
-	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
-	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+	ASSERT(iomapp->iomap_bn != HOLESTARTBLOCK);
+	ASSERT(iomapp->iomap_bn != DELAYSTARTBLOCK);
 
 	lock_buffer(bh);
 	xfs_map_buffer(inode, bh, iomapp, offset);
@@ -818,8 +820,8 @@ xfs_convert_page(
 				continue;
 			}
 
-			ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
-			ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+			ASSERT(mp->iomap_bn != HOLESTARTBLOCK);
+			ASSERT(mp->iomap_bn != DELAYSTARTBLOCK);
 
 			xfs_map_at_offset(inode, bh, mp, offset);
 			if (startio) {
@@ -1478,7 +1480,8 @@ __xfs_get_blocks(
 	if (niomap == 0)
 		return 0;
 
-	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+	if (iomap.iomap_bn != HOLESTARTBLOCK &&
+	    iomap.iomap_bn != DELAYSTARTBLOCK) {
 		/*
 		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
@@ -1513,7 +1516,7 @@ __xfs_get_blocks(
 	     (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
 		set_buffer_new(bh_result);
 
-	if (iomap.iomap_flags & IOMAP_DELAY) {
+	if (iomap.iomap_bn == DELAYSTARTBLOCK) {
 		BUG_ON(direct);
 		if (create) {
 			set_buffer_uptodate(bh_result);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49b5ad22a9d8..fbe5d32f9ef5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -64,24 +64,15 @@ xfs_imap_to_bmap(
 	int		imaps,			/* Number of imap entries */
 	int		flags)
 {
-	xfs_fsblock_t	start_block;
-
 	iomapp->iomap_offset = imap->br_startoff;
 	iomapp->iomap_bsize = imap->br_blockcount;
 	iomapp->iomap_flags = flags;
+	iomapp->iomap_bn = imap->br_startblock;
 
-	start_block = imap->br_startblock;
-	if (start_block == HOLESTARTBLOCK) {
-		iomapp->iomap_bn = IOMAP_DADDR_NULL;
-		iomapp->iomap_flags |= IOMAP_HOLE;
-	} else if (start_block == DELAYSTARTBLOCK) {
-		iomapp->iomap_bn = IOMAP_DADDR_NULL;
-		iomapp->iomap_flags |= IOMAP_DELAY;
-	} else {
-		iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
-		if (ISUNWRITTEN(imap))
-			iomapp->iomap_flags |= IOMAP_UNWRITTEN;
-	}
+	if (imap->br_startblock != HOLESTARTBLOCK &&
+	    imap->br_startblock != DELAYSTARTBLOCK &&
+	    ISUNWRITTEN(imap))
+		iomapp->iomap_flags |= IOMAP_UNWRITTEN;
 }
 
 int
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index db9299631ee4..d2f3b67d39f9 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,12 +18,8 @@
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
 
-#define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-
 typedef enum {				/* iomap_flags values */
 	IOMAP_READ =		0,	/* mapping for a read */
-	IOMAP_HOLE =		0x02,	/* mapping covers a hole  */
 	IOMAP_DELAY =		0x04,	/* mapping covers delalloc region  */
 	IOMAP_UNWRITTEN =	0x20,	/* mapping covers allocated */
 					/* but uninitialized file data  */
-- 
cgit v1.2.3


From 207d041602cead1c1a16288f6225aea9da1f5bc4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:56 +0000
Subject: xfs: kill struct xfs_iomap

Now that struct xfs_iomap contains exactly the same units as struct
xfs_bmbt_irec we can just use the latter directly in the aops code.
Replace the missing IOMAP_NEW flag with a new boolean output
parameter to xfs_iomap.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 90 +++++++++++++++++++++++----------------------
 fs/xfs/xfs_iomap.c          | 82 ++++++++++++++---------------------------
 fs/xfs/xfs_iomap.h          | 26 +------------
 3 files changed, 75 insertions(+), 123 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 2b09cc34dd07..70ce1da73d01 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -310,23 +310,24 @@ xfs_map_blocks(
 	struct inode		*inode,
 	loff_t			offset,
 	ssize_t			count,
-	xfs_iomap_t		*mapp,
+	struct xfs_bmbt_irec	*imap,
 	int			flags)
 {
 	int			nmaps = 1;
+	int			new = 0;
 
-	return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
+	return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
 }
 
 STATIC int
 xfs_iomap_valid(
 	struct inode		*inode,
-	xfs_iomap_t		*iomapp,
+	struct xfs_bmbt_irec	*imap,
 	loff_t			offset)
 {
 	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
-	xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, iomapp->iomap_offset);
-	xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, iomapp->iomap_bsize);
+	xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+	xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
 
 	return offset >= iomap_offset &&
 		offset < iomap_offset + iomap_bsize;
@@ -562,16 +563,16 @@ STATIC void
 xfs_map_buffer(
 	struct inode		*inode,
 	struct buffer_head	*bh,
-	xfs_iomap_t		*mp,
+	struct xfs_bmbt_irec	*imap,
 	xfs_off_t		offset)
 {
 	sector_t		bn;
 	struct xfs_mount	*m = XFS_I(inode)->i_mount;
-	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, mp->iomap_offset);
-	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), mp->iomap_bn);
+	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
 
-	ASSERT(mp->iomap_bn != HOLESTARTBLOCK);
-	ASSERT(mp->iomap_bn != DELAYSTARTBLOCK);
+	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
 	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
 	      ((offset - iomap_offset) >> inode->i_blkbits);
@@ -586,14 +587,14 @@ STATIC void
 xfs_map_at_offset(
 	struct inode		*inode,
 	struct buffer_head	*bh,
-	xfs_iomap_t		*iomapp,
+	struct xfs_bmbt_irec	*imap,
 	xfs_off_t		offset)
 {
-	ASSERT(iomapp->iomap_bn != HOLESTARTBLOCK);
-	ASSERT(iomapp->iomap_bn != DELAYSTARTBLOCK);
+	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
 	lock_buffer(bh);
-	xfs_map_buffer(inode, bh, iomapp, offset);
+	xfs_map_buffer(inode, bh, imap, offset);
 	bh->b_bdev = xfs_find_bdev_for_inode(inode);
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
@@ -750,7 +751,7 @@ xfs_convert_page(
 	struct inode		*inode,
 	struct page		*page,
 	loff_t			tindex,
-	xfs_iomap_t		*mp,
+	struct xfs_bmbt_irec	*imap,
 	xfs_ioend_t		**ioendp,
 	struct writeback_control *wbc,
 	int			startio,
@@ -815,15 +816,15 @@ xfs_convert_page(
 			else
 				type = IOMAP_DELAY;
 
-			if (!xfs_iomap_valid(inode, mp, offset)) {
+			if (!xfs_iomap_valid(inode, imap, offset)) {
 				done = 1;
 				continue;
 			}
 
-			ASSERT(mp->iomap_bn != HOLESTARTBLOCK);
-			ASSERT(mp->iomap_bn != DELAYSTARTBLOCK);
+			ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+			ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
-			xfs_map_at_offset(inode, bh, mp, offset);
+			xfs_map_at_offset(inode, bh, imap, offset);
 			if (startio) {
 				xfs_add_to_ioend(inode, bh, offset,
 						type, ioendp, done);
@@ -875,7 +876,7 @@ STATIC void
 xfs_cluster_write(
 	struct inode		*inode,
 	pgoff_t			tindex,
-	xfs_iomap_t		*iomapp,
+	struct xfs_bmbt_irec	*imap,
 	xfs_ioend_t		**ioendp,
 	struct writeback_control *wbc,
 	int			startio,
@@ -894,7 +895,7 @@ xfs_cluster_write(
 
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-					iomapp, ioendp, wbc, startio, all_bh);
+					imap, ioendp, wbc, startio, all_bh);
 			if (done)
 				break;
 		}
@@ -1051,7 +1052,7 @@ xfs_page_state_convert(
 	int		unmapped) /* also implies page uptodate */
 {
 	struct buffer_head	*bh, *head;
-	xfs_iomap_t		iomap;
+	struct xfs_bmbt_irec	imap;
 	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
 	loff_t			offset;
 	unsigned long           p_offset = 0;
@@ -1125,7 +1126,7 @@ xfs_page_state_convert(
 		}
 
 		if (iomap_valid)
-			iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
+			iomap_valid = xfs_iomap_valid(inode, &imap, offset);
 
 		/*
 		 * First case, map an unwritten extent and prepare for
@@ -1177,13 +1178,13 @@ xfs_page_state_convert(
 				}
 
 				err = xfs_map_blocks(inode, offset, size,
-						&iomap, flags);
+						&imap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
+				iomap_valid = xfs_iomap_valid(inode, &imap, offset);
 			}
 			if (iomap_valid) {
-				xfs_map_at_offset(inode, bh, &iomap, offset);
+				xfs_map_at_offset(inode, bh, &imap, offset);
 				if (startio) {
 					xfs_add_to_ioend(inode, bh, offset,
 							type, &ioend,
@@ -1207,10 +1208,10 @@ xfs_page_state_convert(
 				size = xfs_probe_cluster(inode, page, bh,
 								head, 1);
 				err = xfs_map_blocks(inode, offset, size,
-						&iomap, flags);
+						&imap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(inode, &iomap, offset);
+				iomap_valid = xfs_iomap_valid(inode, &imap, offset);
 			}
 
 			/*
@@ -1251,13 +1252,13 @@ xfs_page_state_convert(
 
 	if (ioend && iomap_valid) {
 		struct xfs_mount	*m = XFS_I(inode)->i_mount;
-		xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, iomap.iomap_offset);
-		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(m, iomap.iomap_bsize);
+		xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff);
+		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount);
 
 		offset = (iomap_offset + iomap_bsize - 1) >>
 					PAGE_CACHE_SHIFT;
 		tlast = min_t(pgoff_t, offset, last_index);
-		xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
+		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
 					wbc, startio, all_bh, tlast);
 	}
 
@@ -1460,10 +1461,11 @@ __xfs_get_blocks(
 	int			direct,
 	bmapi_flags_t		flags)
 {
-	xfs_iomap_t		iomap;
+	struct xfs_bmbt_irec	imap;
 	xfs_off_t		offset;
 	ssize_t			size;
-	int			niomap = 1;
+	int			nimap = 1;
+	int			new = 0;
 	int			error;
 
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
@@ -1474,21 +1476,21 @@ __xfs_get_blocks(
 		return 0;
 
 	error = xfs_iomap(XFS_I(inode), offset, size,
-			     create ? flags : BMAPI_READ, &iomap, &niomap);
+			     create ? flags : BMAPI_READ, &imap, &nimap, &new);
 	if (error)
 		return -error;
-	if (niomap == 0)
+	if (nimap == 0)
 		return 0;
 
-	if (iomap.iomap_bn != HOLESTARTBLOCK &&
-	    iomap.iomap_bn != DELAYSTARTBLOCK) {
+	if (imap.br_startblock != HOLESTARTBLOCK &&
+	    imap.br_startblock != DELAYSTARTBLOCK) {
 		/*
 		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
 		 */
-		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN))
-			xfs_map_buffer(inode, bh_result, &iomap, offset);
-		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+		if (create || !ISUNWRITTEN(&imap))
+			xfs_map_buffer(inode, bh_result, &imap, offset);
+		if (create && ISUNWRITTEN(&imap)) {
 			if (direct)
 				bh_result->b_private = inode;
 			set_buffer_unwritten(bh_result);
@@ -1513,10 +1515,10 @@ __xfs_get_blocks(
 	if (create &&
 	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
 	     (offset >= i_size_read(inode)) ||
-	     (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
+	     (new || ISUNWRITTEN(&imap))))
 		set_buffer_new(bh_result);
 
-	if (iomap.iomap_bn == DELAYSTARTBLOCK) {
+	if (imap.br_startblock == DELAYSTARTBLOCK) {
 		BUG_ON(direct);
 		if (create) {
 			set_buffer_uptodate(bh_result);
@@ -1527,9 +1529,9 @@ __xfs_get_blocks(
 
 	if (direct || size > (1 << inode->i_blkbits)) {
 		struct xfs_mount	*mp = XFS_I(inode)->i_mount;
-		xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, iomap.iomap_offset);
+		xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, imap.br_startoff);
 		xfs_off_t		iomap_delta = offset - iomap_offset;
-		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, iomap.iomap_bsize);
+		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, imap.br_blockcount);
 
 		ASSERT(iomap_bsize - iomap_delta > 0);
 		offset = min_t(xfs_off_t,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fbe5d32f9ef5..7545dcdaa8aa 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -55,46 +55,25 @@
 #define XFS_STRAT_WRITE_IMAPS	2
 #define XFS_WRITE_IMAPS		XFS_BMAP_MAX_NMAP
 
-STATIC void
-xfs_imap_to_bmap(
-	xfs_inode_t	*ip,
-	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap,
-	xfs_iomap_t	*iomapp,
-	int		imaps,			/* Number of imap entries */
-	int		flags)
-{
-	iomapp->iomap_offset = imap->br_startoff;
-	iomapp->iomap_bsize = imap->br_blockcount;
-	iomapp->iomap_flags = flags;
-	iomapp->iomap_bn = imap->br_startblock;
-
-	if (imap->br_startblock != HOLESTARTBLOCK &&
-	    imap->br_startblock != DELAYSTARTBLOCK &&
-	    ISUNWRITTEN(imap))
-		iomapp->iomap_flags |= IOMAP_UNWRITTEN;
-}
-
 int
 xfs_iomap(
-	xfs_inode_t	*ip,
-	xfs_off_t	offset,
-	ssize_t		count,
-	int		flags,
-	xfs_iomap_t	*iomapp,
-	int		*niomaps)
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	ssize_t			count,
+	int			flags,
+	struct xfs_bmbt_irec	*imap,
+	int			*nimaps,
+	int			*new)
 {
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_fileoff_t	offset_fsb, end_fsb;
-	int		error = 0;
-	int		lockmode = 0;
-	xfs_bmbt_irec_t	imap;
-	int		nimaps = 1;
-	int		bmapi_flags = 0;
-	int		iomap_flags = 0;
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		offset_fsb, end_fsb;
+	int			error = 0;
+	int			lockmode = 0;
+	int			bmapi_flags = 0;
 
 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-	ASSERT(niomaps && *niomaps == 1);
+
+	*new = 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -136,8 +115,8 @@ xfs_iomap(
 
 	error = xfs_bmapi(NULL, ip, offset_fsb,
 			(xfs_filblks_t)(end_fsb - offset_fsb),
-			bmapi_flags,  NULL, 0, &imap,
-			&nimaps, NULL, NULL);
+			bmapi_flags,  NULL, 0, imap,
+			nimaps, NULL, NULL);
 
 	if (error)
 		goto out;
@@ -145,45 +124,41 @@ xfs_iomap(
 	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
 	case BMAPI_WRITE:
 		/* If we found an extent, return it */
-		if (nimaps &&
-		    (imap.br_startblock != HOLESTARTBLOCK) &&
-		    (imap.br_startblock != DELAYSTARTBLOCK)) {
-			trace_xfs_iomap_found(ip, offset, count, flags, &imap);
+		if (*nimaps &&
+		    (imap->br_startblock != HOLESTARTBLOCK) &&
+		    (imap->br_startblock != DELAYSTARTBLOCK)) {
+			trace_xfs_iomap_found(ip, offset, count, flags, imap);
 			break;
 		}
 
 		if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
 			error = xfs_iomap_write_direct(ip, offset, count, flags,
-						       &imap, &nimaps, nimaps);
+						       imap, nimaps, *nimaps);
 		} else {
 			error = xfs_iomap_write_delay(ip, offset, count, flags,
-						      &imap, &nimaps);
+						      imap, nimaps);
 		}
 		if (!error) {
-			trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
+			trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
 		}
-		iomap_flags = IOMAP_NEW;
+		*new = 1;
 		break;
 	case BMAPI_ALLOCATE:
 		/* If we found an extent, return it */
 		xfs_iunlock(ip, lockmode);
 		lockmode = 0;
 
-		if (nimaps && !isnullstartblock(imap.br_startblock)) {
-			trace_xfs_iomap_found(ip, offset, count, flags, &imap);
+		if (*nimaps && !isnullstartblock(imap->br_startblock)) {
+			trace_xfs_iomap_found(ip, offset, count, flags, imap);
 			break;
 		}
 
 		error = xfs_iomap_write_allocate(ip, offset, count,
-						 &imap, &nimaps);
+						 imap, nimaps);
 		break;
 	}
 
-	ASSERT(nimaps <= 1);
-
-	if (nimaps)
-		xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags);
-	*niomaps = nimaps;
+	ASSERT(*nimaps <= 1);
 
 out:
 	if (lockmode)
@@ -191,7 +166,6 @@ out:
 	return XFS_ERROR(error);
 }
 
-
 STATIC int
 xfs_iomap_eof_align_last_fsb(
 	xfs_mount_t	*mp,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index d2f3b67d39f9..ba49a4fd1b3f 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -47,35 +47,11 @@ typedef enum {
 	{ BMAPI_MMAP,		"MMAP" }, \
 	{ BMAPI_TRYLOCK,	"TRYLOCK" }
 
-/*
- * xfs_iomap_t:  File system I/O map
- *
- * The iomap_bn field is expressed in 512-byte blocks, and is where the
- * mapping starts on disk.
- *
- * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes.
- * iomap_offset is the offset of the mapping in the file itself.
- * iomap_bsize is the size of the mapping,  iomap_delta is the
- * desired data's offset into the mapping, given the offset supplied
- * to the file I/O map routine.
- *
- * When a request is made to read beyond the logical end of the object,
- * iomap_size may be set to 0, but iomap_offset and iomap_length should be set
- * to the actual amount of underlying storage that has been allocated, if any.
- */
-
-typedef struct xfs_iomap {
-	xfs_daddr_t		iomap_bn;	/* first 512B blk of mapping */
-	xfs_off_t		iomap_offset;	/* offset of mapping, bytes */
-	xfs_off_t		iomap_bsize;	/* size of mapping, bytes */
-	iomap_flags_t		iomap_flags;
-} xfs_iomap_t;
-
 struct xfs_inode;
 struct xfs_bmbt_irec;
 
 extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
-		     struct xfs_iomap *, int *);
+		     struct xfs_bmbt_irec *, int *, int *);
 extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 				  int, struct xfs_bmbt_irec *, int *, int);
 extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
-- 
cgit v1.2.3


From 34a52c6c064fb9f1fd1310407ce076a4bb049734 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:57 +0000
Subject: xfs: move I/O type flags into xfs_aops.c

The IOMAP_ flags are now only used inside xfs_aops.c for extent
probing and I/O completion tracking, so more them here, and rename
them to IO_* as there's no mapping involved at all.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 51 ++++++++++++++++++++++++++-------------------
 fs/xfs/xfs_iomap.h          |  8 -------
 2 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 70ce1da73d01..f1dd70e201cf 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -45,6 +45,15 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+	IO_READ,	/* mapping for a read */
+	IO_DELAY,	/* mapping covers delalloc region */
+	IO_UNWRITTEN,	/* mapping covers allocated but uninitialized data */
+	IO_NEW		/* just allocated */
+};
 
 /*
  * Prime number of hash buckets since address is used as the key.
@@ -184,7 +193,7 @@ xfs_setfilesize(
 	xfs_fsize_t		isize;
 
 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-	ASSERT(ioend->io_type != IOMAP_READ);
+	ASSERT(ioend->io_type != IO_READ);
 
 	if (unlikely(ioend->io_error))
 		return 0;
@@ -215,7 +224,7 @@ xfs_finish_ioend(
 	if (atomic_dec_and_test(&ioend->io_remaining)) {
 		struct workqueue_struct *wq;
 
-		wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
+		wq = (ioend->io_type == IO_UNWRITTEN) ?
 			xfsconvertd_workqueue : xfsdatad_workqueue;
 		queue_work(wq, &ioend->io_work);
 		if (wait)
@@ -238,7 +247,7 @@ xfs_end_io(
 	 * For unwritten extents we need to issue transactions to convert a
 	 * range to normal written extens after the data I/O has finished.
 	 */
-	if (ioend->io_type == IOMAP_UNWRITTEN &&
+	if (ioend->io_type == IO_UNWRITTEN &&
 	    likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
 
 		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
@@ -251,7 +260,7 @@ xfs_end_io(
 	 * We might have to update the on-disk file size after extending
 	 * writes.
 	 */
-	if (ioend->io_type != IOMAP_READ) {
+	if (ioend->io_type != IO_READ) {
 		error = xfs_setfilesize(ioend);
 		ASSERT(!error || error == EAGAIN);
 	}
@@ -724,11 +733,11 @@ xfs_is_delayed_page(
 		bh = head = page_buffers(page);
 		do {
 			if (buffer_unwritten(bh))
-				acceptable = (type == IOMAP_UNWRITTEN);
+				acceptable = (type == IO_UNWRITTEN);
 			else if (buffer_delay(bh))
-				acceptable = (type == IOMAP_DELAY);
+				acceptable = (type == IO_DELAY);
 			else if (buffer_dirty(bh) && buffer_mapped(bh))
-				acceptable = (type == IOMAP_NEW);
+				acceptable = (type == IO_NEW);
 			else
 				break;
 		} while ((bh = bh->b_this_page) != head);
@@ -812,9 +821,9 @@ xfs_convert_page(
 
 		if (buffer_unwritten(bh) || buffer_delay(bh)) {
 			if (buffer_unwritten(bh))
-				type = IOMAP_UNWRITTEN;
+				type = IO_UNWRITTEN;
 			else
-				type = IOMAP_DELAY;
+				type = IO_DELAY;
 
 			if (!xfs_iomap_valid(inode, imap, offset)) {
 				done = 1;
@@ -836,7 +845,7 @@ xfs_convert_page(
 			page_dirty--;
 			count++;
 		} else {
-			type = IOMAP_NEW;
+			type = IO_NEW;
 			if (buffer_mapped(bh) && all_bh && startio) {
 				lock_buffer(bh);
 				xfs_add_to_ioend(inode, bh, offset,
@@ -940,7 +949,7 @@ xfs_aops_discard_page(
 	loff_t			offset = page_offset(page);
 	ssize_t			len = 1 << inode->i_blkbits;
 
-	if (!xfs_is_delayed_page(page, IOMAP_DELAY))
+	if (!xfs_is_delayed_page(page, IO_DELAY))
 		goto out_invalidate;
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1107,7 +1116,7 @@ xfs_page_state_convert(
 	bh = head = page_buffers(page);
 	offset = page_offset(page);
 	flags = BMAPI_READ;
-	type = IOMAP_NEW;
+	type = IO_NEW;
 
 	/* TODO: cleanup count and page_dirty */
 
@@ -1150,13 +1159,13 @@ xfs_page_state_convert(
 				iomap_valid = 0;
 
 			if (buffer_unwritten(bh)) {
-				type = IOMAP_UNWRITTEN;
+				type = IO_UNWRITTEN;
 				flags = BMAPI_WRITE | BMAPI_IGNSTATE;
 			} else if (buffer_delay(bh)) {
-				type = IOMAP_DELAY;
+				type = IO_DELAY;
 				flags = BMAPI_ALLOCATE | trylock;
 			} else {
-				type = IOMAP_NEW;
+				type = IO_NEW;
 				flags = BMAPI_WRITE | BMAPI_MMAP;
 			}
 
@@ -1170,7 +1179,7 @@ xfs_page_state_convert(
 				 * for unwritten extent conversion.
 				 */
 				new_ioend = 1;
-				if (type == IOMAP_NEW) {
+				if (type == IO_NEW) {
 					size = xfs_probe_cluster(inode,
 							page, bh, head, 0);
 				} else {
@@ -1215,14 +1224,14 @@ xfs_page_state_convert(
 			}
 
 			/*
-			 * We set the type to IOMAP_NEW in case we are doing a
+			 * We set the type to IO_NEW in case we are doing a
 			 * small write at EOF that is extending the file but
 			 * without needing an allocation. We need to update the
 			 * file size on I/O completion in this case so it is
 			 * the same case as having just allocated a new extent
 			 * that we are writing into for the first time.
 			 */
-			type = IOMAP_NEW;
+			type = IO_NEW;
 			if (trylock_buffer(bh)) {
 				ASSERT(buffer_mapped(bh));
 				if (iomap_valid)
@@ -1594,7 +1603,7 @@ xfs_end_io_direct(
 	 */
 	ioend->io_offset = offset;
 	ioend->io_size = size;
-	if (ioend->io_type == IOMAP_READ) {
+	if (ioend->io_type == IO_READ) {
 		xfs_finish_ioend(ioend, 0);
 	} else if (private && size > 0) {
 		xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
@@ -1605,7 +1614,7 @@ xfs_end_io_direct(
 		 * didn't map an unwritten extent so switch it's completion
 		 * handler.
 		 */
-		ioend->io_type = IOMAP_NEW;
+		ioend->io_type = IO_NEW;
 		xfs_finish_ioend(ioend, 0);
 	}
 
@@ -1633,7 +1642,7 @@ xfs_vm_direct_IO(
 	bdev = xfs_find_bdev_for_inode(inode);
 
 	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
-					IOMAP_UNWRITTEN : IOMAP_READ);
+					IO_UNWRITTEN : IO_READ);
 
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
 					    offset, nr_segs,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index ba49a4fd1b3f..41e32d20a405 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,14 +18,6 @@
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
 
-typedef enum {				/* iomap_flags values */
-	IOMAP_READ =		0,	/* mapping for a read */
-	IOMAP_DELAY =		0x04,	/* mapping covers delalloc region  */
-	IOMAP_UNWRITTEN =	0x20,	/* mapping covers allocated */
-					/* but uninitialized file data  */
-	IOMAP_NEW =		0x40	/* just allocate */
-} iomap_flags_t;
-
 typedef enum {
 	/* base extent manipulation calls */
 	BMAPI_READ = (1 << 0),		/* read extents */
-- 
cgit v1.2.3


From 558e6891693f4c383c51c7343a88dea174eadacf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:58 +0000
Subject: xfs: clean up xfs_iomap_valid

Rename all iomap_valid identifiers to imap_valid to fit the new
world order, and clean up xfs_iomap_valid to convert the passed in
offset to blocks instead of the imap values to bytes.  Use the
simpler inode->i_blkbits instead of the XFS macros for this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 46 ++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index f1dd70e201cf..6feecd279470 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -329,17 +329,15 @@ xfs_map_blocks(
 }
 
 STATIC int
-xfs_iomap_valid(
+xfs_imap_valid(
 	struct inode		*inode,
 	struct xfs_bmbt_irec	*imap,
-	loff_t			offset)
+	xfs_off_t		offset)
 {
-	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
-	xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-	xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
+	offset >>= inode->i_blkbits;
 
-	return offset >= iomap_offset &&
-		offset < iomap_offset + iomap_bsize;
+	return offset >= imap->br_startoff &&
+		offset < imap->br_startoff + imap->br_blockcount;
 }
 
 /*
@@ -825,7 +823,7 @@ xfs_convert_page(
 			else
 				type = IO_DELAY;
 
-			if (!xfs_iomap_valid(inode, imap, offset)) {
+			if (!xfs_imap_valid(inode, imap, offset)) {
 				done = 1;
 				continue;
 			}
@@ -1069,7 +1067,7 @@ xfs_page_state_convert(
 	__uint64_t              end_offset;
 	pgoff_t                 end_index, last_index, tlast;
 	ssize_t			size, len;
-	int			flags, err, iomap_valid = 0, uptodate = 1;
+	int			flags, err, imap_valid = 0, uptodate = 1;
 	int			page_dirty, count = 0;
 	int			trylock = 0;
 	int			all_bh = unmapped;
@@ -1130,12 +1128,12 @@ xfs_page_state_convert(
 			 * the iomap is actually still valid, but the ioend
 			 * isn't.  shouldn't happen too often.
 			 */
-			iomap_valid = 0;
+			imap_valid = 0;
 			continue;
 		}
 
-		if (iomap_valid)
-			iomap_valid = xfs_iomap_valid(inode, &imap, offset);
+		if (imap_valid)
+			imap_valid = xfs_imap_valid(inode, &imap, offset);
 
 		/*
 		 * First case, map an unwritten extent and prepare for
@@ -1156,7 +1154,7 @@ xfs_page_state_convert(
 			 * Make sure we don't use a read-only iomap
 			 */
 			if (flags == BMAPI_READ)
-				iomap_valid = 0;
+				imap_valid = 0;
 
 			if (buffer_unwritten(bh)) {
 				type = IO_UNWRITTEN;
@@ -1169,7 +1167,7 @@ xfs_page_state_convert(
 				flags = BMAPI_WRITE | BMAPI_MMAP;
 			}
 
-			if (!iomap_valid) {
+			if (!imap_valid) {
 				/*
 				 * if we didn't have a valid mapping then we
 				 * need to ensure that we put the new mapping
@@ -1190,9 +1188,10 @@ xfs_page_state_convert(
 						&imap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(inode, &imap, offset);
+				imap_valid = xfs_imap_valid(inode, &imap,
+							    offset);
 			}
-			if (iomap_valid) {
+			if (imap_valid) {
 				xfs_map_at_offset(inode, bh, &imap, offset);
 				if (startio) {
 					xfs_add_to_ioend(inode, bh, offset,
@@ -1212,7 +1211,7 @@ xfs_page_state_convert(
 			 * That means it must already have extents allocated
 			 * underneath it. Map the extent by reading it.
 			 */
-			if (!iomap_valid || flags != BMAPI_READ) {
+			if (!imap_valid || flags != BMAPI_READ) {
 				flags = BMAPI_READ;
 				size = xfs_probe_cluster(inode, page, bh,
 								head, 1);
@@ -1220,7 +1219,8 @@ xfs_page_state_convert(
 						&imap, flags);
 				if (err)
 					goto error;
-				iomap_valid = xfs_iomap_valid(inode, &imap, offset);
+				imap_valid = xfs_imap_valid(inode, &imap,
+							    offset);
 			}
 
 			/*
@@ -1234,18 +1234,18 @@ xfs_page_state_convert(
 			type = IO_NEW;
 			if (trylock_buffer(bh)) {
 				ASSERT(buffer_mapped(bh));
-				if (iomap_valid)
+				if (imap_valid)
 					all_bh = 1;
 				xfs_add_to_ioend(inode, bh, offset, type,
-						&ioend, !iomap_valid);
+						&ioend, !imap_valid);
 				page_dirty--;
 				count++;
 			} else {
-				iomap_valid = 0;
+				imap_valid = 0;
 			}
 		} else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
 			   (unmapped || startio)) {
-			iomap_valid = 0;
+			imap_valid = 0;
 		}
 
 		if (!iohead)
@@ -1259,7 +1259,7 @@ xfs_page_state_convert(
 	if (startio)
 		xfs_start_page_writeback(page, 1, count);
 
-	if (ioend && iomap_valid) {
+	if (ioend && imap_valid) {
 		struct xfs_mount	*m = XFS_I(inode)->i_mount;
 		xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff);
 		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount);
-- 
cgit v1.2.3


From 2b8f12b7e438fa6ba4a0f8f861871be0beb3a3e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:28:59 +0000
Subject: xfs: clean up mapping size calculation in __xfs_get_blocks

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6feecd279470..1d51bdde5748 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1536,16 +1536,23 @@ __xfs_get_blocks(
 		}
 	}
 
+	/*
+	 * If this is O_DIRECT or the mpage code calling tell them how large
+	 * the mapping is, so that we can avoid repeated get_blocks calls.
+	 */
 	if (direct || size > (1 << inode->i_blkbits)) {
-		struct xfs_mount	*mp = XFS_I(inode)->i_mount;
-		xfs_off_t		iomap_offset = XFS_FSB_TO_B(mp, imap.br_startoff);
-		xfs_off_t		iomap_delta = offset - iomap_offset;
-		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-		ASSERT(iomap_bsize - iomap_delta > 0);
-		offset = min_t(xfs_off_t,
-				iomap_bsize - iomap_delta, size);
-		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
+		xfs_off_t		mapping_size;
+
+		mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+		mapping_size <<= inode->i_blkbits;
+
+		ASSERT(mapping_size > 0);
+		if (mapping_size > size)
+			mapping_size = size;
+		if (mapping_size > LONG_MAX)
+			mapping_size = LONG_MAX;
+
+		bh_result->b_size = mapping_size;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From bd1556a146d46070049428dded306829cb65161d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Wed, 28 Apr 2010 12:29:00 +0000
Subject: xfs: clean up end index calculation in xfs_page_state_convert

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 1d51bdde5748..089eaca860b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1065,7 +1065,7 @@ xfs_page_state_convert(
 	unsigned long           p_offset = 0;
 	unsigned int		type;
 	__uint64_t              end_offset;
-	pgoff_t                 end_index, last_index, tlast;
+	pgoff_t                 end_index, last_index;
 	ssize_t			size, len;
 	int			flags, err, imap_valid = 0, uptodate = 1;
 	int			page_dirty, count = 0;
@@ -1260,15 +1260,22 @@ xfs_page_state_convert(
 		xfs_start_page_writeback(page, 1, count);
 
 	if (ioend && imap_valid) {
-		struct xfs_mount	*m = XFS_I(inode)->i_mount;
-		xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap.br_startoff);
-		xfs_off_t		iomap_bsize = XFS_FSB_TO_B(m, imap.br_blockcount);
+		xfs_off_t		end_index;
+
+		end_index = imap.br_startoff + imap.br_blockcount;
+
+		/* to bytes */
+		end_index <<= inode->i_blkbits;
+
+		/* to pages */
+		end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+		/* check against file size */
+		if (end_index > last_index)
+			end_index = last_index;
 
-		offset = (iomap_offset + iomap_bsize - 1) >>
-					PAGE_CACHE_SHIFT;
-		tlast = min_t(pgoff_t, offset, last_index);
 		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-					wbc, startio, all_bh, tlast);
+					wbc, startio, all_bh, end_index);
 	}
 
 	if (iohead)
-- 
cgit v1.2.3


From b9b2dd36c1bc64430f8e13990ab135cbecc10076 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Thu, 6 May 2010 17:04:58 -0400
Subject: quota: unify ->get_dqblk

Pass the larger struct fs_disk_quota to the ->get_dqblk operation so
that the Q_GETQUOTA and Q_XGETQUOTA operations can be implemented
with a single filesystem operation and we can retire the ->get_xquota
operation.  The additional information (RT-subvolume accounting and
warn counts) are left zero for the VFS quota implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c                 |  6 +++---
 fs/quota/dquot.c                | 27 ++++++++++++++++-----------
 fs/quota/quota.c                | 23 ++++++++++++++++++-----
 fs/xfs/linux-2.6/xfs_quotaops.c |  4 ++--
 include/linux/quota.h           |  3 +--
 include/linux/quotaops.h        |  3 ++-
 6 files changed, 42 insertions(+), 24 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index d5f4661287f9..dec93577a783 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1476,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
 	return 0;
 }
 
-static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id,
-			   struct fs_disk_quota *fdq)
+static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
+			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_lvb *qlvb;
@@ -1629,7 +1629,7 @@ out_put:
 const struct quotactl_ops gfs2_quotactl_ops = {
 	.quota_sync     = gfs2_quota_sync,
 	.get_xstate     = gfs2_quota_get_xstate,
-	.get_xquota	= gfs2_xquota_get,
+	.get_dqblk	= gfs2_get_dqblk,
 	.set_xquota	= gfs2_xquota_set,
 };
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 01347e81d0ca..6aed77fc99c7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2301,25 +2301,30 @@ static inline qsize_t stoqb(qsize_t space)
 }
 
 /* Generic routine for getting common part of quota structure */
-static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
+static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 
+	memset(di, 0, sizeof(*di));
+	di->d_version = FS_DQUOT_VERSION;
+	di->d_flags = dquot->dq_type == USRQUOTA ?
+			XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+	di->d_id = dquot->dq_id;
+
 	spin_lock(&dq_data_lock);
-	di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
-	di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
-	di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
-	di->dqb_ihardlimit = dm->dqb_ihardlimit;
-	di->dqb_isoftlimit = dm->dqb_isoftlimit;
-	di->dqb_curinodes = dm->dqb_curinodes;
-	di->dqb_btime = dm->dqb_btime;
-	di->dqb_itime = dm->dqb_itime;
-	di->dqb_valid = QIF_ALL;
+	di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
+	di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
+	di->d_ino_hardlimit = dm->dqb_ihardlimit;
+	di->d_ino_softlimit = dm->dqb_isoftlimit;
+	di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
+	di->d_icount = dm->dqb_curinodes;
+	di->d_btimer = dm->dqb_btime;
+	di->d_itimer = dm->dqb_itime;
 	spin_unlock(&dq_data_lock);
 }
 
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
-		  struct if_dqblk *di)
+		  struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95388f9b7356..8680e257c2bd 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -136,19 +136,32 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
 	return sb->s_qcop->set_info(sb, type, &info);
 }
 
+static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
+{
+	dst->dqb_bhardlimit = src->d_blk_hardlimit;
+	dst->dqb_bsoftlimit = src->d_blk_softlimit;
+	dst->dqb_curspace = src->d_bcount;
+	dst->dqb_ihardlimit = src->d_ino_hardlimit;
+	dst->dqb_isoftlimit = src->d_ino_softlimit;
+	dst->dqb_curinodes = src->d_icount;
+	dst->dqb_btime = src->d_btimer;
+	dst->dqb_itime = src->d_itimer;
+	dst->dqb_valid = QIF_ALL;
+}
+
 static int quota_getquota(struct super_block *sb, int type, qid_t id,
 			  void __user *addr)
 {
+	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
 	int ret;
 
-	if (!sb_has_quota_active(sb, type))
-		return -ESRCH;
 	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
 	if (ret)
 		return ret;
+	copy_to_if_dqblk(&idq, &fdq);
 	if (copy_to_user(addr, &idq, sizeof(idq)))
 		return -EFAULT;
 	return 0;
@@ -210,9 +223,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 	struct fs_disk_quota fdq;
 	int ret;
 
-	if (!sb->s_qcop->get_xquota)
+	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
+	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
 	if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
 		return -EFAULT;
 	return ret;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 1947514ce1ad..3d473f43c9a9 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -97,7 +97,7 @@ xfs_fs_set_xstate(
 }
 
 STATIC int
-xfs_fs_get_xquota(
+xfs_fs_get_dqblk(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -135,6 +135,6 @@ xfs_fs_set_xquota(
 const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
-	.get_xquota		= xfs_fs_get_xquota,
+	.get_dqblk		= xfs_fs_get_dqblk,
 	.set_xquota		= xfs_fs_set_xquota,
 };
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cdfde10481b7..42364219dc9b 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -337,11 +337,10 @@ struct quotactl_ops {
 	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
-	int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
+	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
-	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 };
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e6fa7acce290..d32a48631b0d 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -63,7 +63,8 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
 int vfs_quota_sync(struct super_block *sb, int type, int wait);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
+		struct fs_disk_quota *di);
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
-- 
cgit v1.2.3


From c472b43275976512e4c1c32da5ced03f339cb380 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Thu, 6 May 2010 17:05:17 -0400
Subject: quota: unify ->set_dqblk

Pass the larger struct fs_disk_quota to the ->set_dqblk operation so
that the Q_SETQUOTA and Q_XSETQUOTA operations can be implemented
with a single filesystem operation and we can retire the ->set_xquota
operation.  The additional information (RT-subvolume accounting and
warn counts) are left zero for the VFS quota implementation.

Add new fieldmask values for setting the numer of blocks and inodes
values which is required for the VFS quota, but wasn't for XFS.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c                 |  6 ++--
 fs/quota/dquot.c                | 67 ++++++++++++++++++++++++++---------------
 fs/quota/quota.c                | 36 +++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_quotaops.c |  4 +--
 fs/xfs/quota/xfs_qm_syscalls.c  | 10 ++++--
 include/linux/dqblk_xfs.h       |  9 ++++++
 include/linux/quota.h           |  3 +-
 include/linux/quotaops.h        |  3 +-
 8 files changed, 98 insertions(+), 40 deletions(-)

(limited to 'fs/xfs/linux-2.6')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index dec93577a783..49667d68769e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1521,8 +1521,8 @@ out:
 /* GFS2 only supports a subset of the XFS fields */
 #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
 
-static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id,
-			   struct fs_disk_quota *fdq)
+static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
+			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
@@ -1630,6 +1630,6 @@ const struct quotactl_ops gfs2_quotactl_ops = {
 	.quota_sync     = gfs2_quota_sync,
 	.get_xstate     = gfs2_quota_get_xstate,
 	.get_dqblk	= gfs2_get_dqblk,
-	.set_xquota	= gfs2_xquota_set,
+	.set_dqblk	= gfs2_set_dqblk,
 };
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6aed77fc99c7..b1a5036560a9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2338,51 +2338,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 }
 EXPORT_SYMBOL(vfs_get_dqblk);
 
+#define VFS_FS_DQ_MASK \
+	(FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
+	 FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
+	 FS_DQ_BTIMER | FS_DQ_ITIMER)
+
 /* Generic routine for setting common part of quota structure */
-static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
+static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 	int check_blim = 0, check_ilim = 0;
 	struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
 
-	if ((di->dqb_valid & QIF_BLIMITS &&
-	     (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
-	      di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
-	    (di->dqb_valid & QIF_ILIMITS &&
-	     (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
-	      di->dqb_isoftlimit > dqi->dqi_maxilimit)))
+	if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
+		return -EINVAL;
+
+	if (((di->d_fieldmask & FS_DQ_BSOFT) &&
+	     (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
+	    ((di->d_fieldmask & FS_DQ_BHARD) &&
+	     (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
+	    ((di->d_fieldmask & FS_DQ_ISOFT) &&
+	     (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
+	    ((di->d_fieldmask & FS_DQ_IHARD) &&
+	     (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
 		return -ERANGE;
 
 	spin_lock(&dq_data_lock);
-	if (di->dqb_valid & QIF_SPACE) {
-		dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
+	if (di->d_fieldmask & FS_DQ_BCOUNT) {
+		dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BLIMITS) {
-		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
-		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
+
+	if (di->d_fieldmask & FS_DQ_BSOFT)
+		dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
+	if (di->d_fieldmask & FS_DQ_BHARD)
+		dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
+	if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_INODES) {
-		dm->dqb_curinodes = di->dqb_curinodes;
+
+	if (di->d_fieldmask & FS_DQ_ICOUNT) {
+		dm->dqb_curinodes = di->d_icount;
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_ILIMITS) {
-		dm->dqb_isoftlimit = di->dqb_isoftlimit;
-		dm->dqb_ihardlimit = di->dqb_ihardlimit;
+
+	if (di->d_fieldmask & FS_DQ_ISOFT)
+		dm->dqb_isoftlimit = di->d_ino_softlimit;
+	if (di->d_fieldmask & FS_DQ_IHARD)
+		dm->dqb_ihardlimit = di->d_ino_hardlimit;
+	if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BTIME) {
-		dm->dqb_btime = di->dqb_btime;
+
+	if (di->d_fieldmask & FS_DQ_BTIMER) {
+		dm->dqb_btime = di->d_btimer;
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_ITIME) {
-		dm->dqb_itime = di->dqb_itime;
+
+	if (di->d_fieldmask & FS_DQ_ITIMER) {
+		dm->dqb_itime = di->d_itimer;
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
 	}
@@ -2392,7 +2411,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		    dm->dqb_curspace < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
 			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
-		} else if (!(di->dqb_valid & QIF_BTIME))
+		} else if (!(di->d_fieldmask & FS_DQ_BTIMER))
 			/* Set grace only if user hasn't provided his own... */
 			dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
 	}
@@ -2401,7 +2420,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		    dm->dqb_curinodes < dm->dqb_isoftlimit) {
 			dm->dqb_itime = 0;
 			clear_bit(DQ_INODES_B, &dquot->dq_flags);
-		} else if (!(di->dqb_valid & QIF_ITIME))
+		} else if (!(di->d_fieldmask & FS_DQ_ITIMER))
 			/* Set grace only if user hasn't provided his own... */
 			dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
 	}
@@ -2417,7 +2436,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 }
 
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
-		  struct if_dqblk *di)
+		  struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 	int rc;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 8680e257c2bd..d6ee49dda4fd 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -167,18 +167,44 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
 	return 0;
 }
 
+static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src)
+{
+	dst->d_blk_hardlimit = src->dqb_bhardlimit;
+	dst->d_blk_softlimit  = src->dqb_bsoftlimit;
+	dst->d_bcount = src->dqb_curspace;
+	dst->d_ino_hardlimit = src->dqb_ihardlimit;
+	dst->d_ino_softlimit = src->dqb_isoftlimit;
+	dst->d_icount = src->dqb_curinodes;
+	dst->d_btimer = src->dqb_btime;
+	dst->d_itimer = src->dqb_itime;
+
+	dst->d_fieldmask = 0;
+	if (src->dqb_valid & QIF_BLIMITS)
+		dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD;
+	if (src->dqb_valid & QIF_SPACE)
+		dst->d_fieldmask |= FS_DQ_BCOUNT;
+	if (src->dqb_valid & QIF_ILIMITS)
+		dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD;
+	if (src->dqb_valid & QIF_INODES)
+		dst->d_fieldmask |= FS_DQ_ICOUNT;
+	if (src->dqb_valid & QIF_BTIME)
+		dst->d_fieldmask |= FS_DQ_BTIMER;
+	if (src->dqb_valid & QIF_ITIME)
+		dst->d_fieldmask |= FS_DQ_ITIMER;
+}
+
 static int quota_setquota(struct super_block *sb, int type, qid_t id,
 			  void __user *addr)
 {
+	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
 
 	if (copy_from_user(&idq, addr, sizeof(idq)))
 		return -EFAULT;
-	if (!sb_has_quota_active(sb, type))
-		return -ESRCH;
 	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
-	return sb->s_qcop->set_dqblk(sb, type, id, &idq);
+	copy_from_if_dqblk(&fdq, &idq);
+	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
 }
 
 static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -212,9 +238,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 
 	if (copy_from_user(&fdq, addr, sizeof(fdq)))
 		return -EFAULT;
-	if (!sb->s_qcop->set_xquota)
+	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
-	return sb->s_qcop->set_xquota(sb, type, id, &fdq);
+	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
 }
 
 static int quota_getxquota(struct super_block *sb, int type, qid_t id,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d473f43c9a9..e31bf21fe5d3 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -114,7 +114,7 @@ xfs_fs_get_dqblk(
 }
 
 STATIC int
-xfs_fs_set_xquota(
+xfs_fs_set_dqblk(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -136,5 +136,5 @@ const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_dqblk		= xfs_fs_get_dqblk,
-	.set_xquota		= xfs_fs_set_xquota,
+	.set_dqblk		= xfs_fs_set_dqblk,
 };
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 26fa43140f2e..92b002f1805f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -448,6 +448,9 @@ xfs_qm_scall_getqstat(
 	return 0;
 }
 
+#define XFS_DQ_MASK \
+	(FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
 /*
  * Adjust quota limits, and start/stop timers accordingly.
  */
@@ -465,9 +468,10 @@ xfs_qm_scall_setqlim(
 	int			error;
 	xfs_qcnt_t		hard, soft;
 
-	if ((newlim->d_fieldmask &
-	    (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
-		return (0);
+	if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+		return EINVAL;
+	if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+		return 0;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
 	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
diff --git a/include/linux/dqblk_xfs.h b/include/linux/dqblk_xfs.h
index 527504c11c5e..4389ae72024e 100644
--- a/include/linux/dqblk_xfs.h
+++ b/include/linux/dqblk_xfs.h
@@ -109,6 +109,15 @@ typedef struct fs_disk_quota {
 #define FS_DQ_RTBWARNS	(1<<11)
 #define FS_DQ_WARNS_MASK	(FS_DQ_BWARNS | FS_DQ_IWARNS | FS_DQ_RTBWARNS)
 
+/*
+ * Accounting values.  These can only be set for filesystem with
+ * non-transactional quotas that require quotacheck(8) in userspace.
+ */
+#define FS_DQ_BCOUNT		(1<<12)
+#define FS_DQ_ICOUNT		(1<<13)
+#define FS_DQ_RTBCOUNT		(1<<14)
+#define FS_DQ_ACCT_MASK		(FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT)
+
 /*
  * Various flags related to quotactl(2).  Only relevant to XFS filesystems.
  */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 42364219dc9b..7126a15467f1 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -338,10 +338,9 @@ struct quotactl_ops {
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
-	int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
+	int (*set_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
-	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 };
 
 struct quota_format_type {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d32a48631b0d..82c70c42d035 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -65,7 +65,8 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 		struct fs_disk_quota *di);
-int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
+		struct fs_disk_quota *di);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
-- 
cgit v1.2.3