16 files changed, 325 insertions, 112 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2c3aeab17e20..7b9cd49622b1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-	if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
 		return;
 
 	invalidate_bh_lrus();
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index f829fe963f5b..5104d84c4f64 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
 } while (0)
 
 
-#define CODA_FREE(ptr,size) \
-    do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
 
 /* inode to cnode access functions */
 
diff --git a/fs/dax.c b/fs/dax.c
index 55aa273145a8..4fd6b0c5c6b5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
@@ -245,6 +246,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	loff_t end = pos + iov_iter_count(iter);
 
 	memset(&bh, 0, sizeof(bh));
+	bh.b_bdev = inode->i_sb->s_bdev;
 
 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
 		struct address_space *mapping = inode->i_mapping;
@@ -324,6 +326,199 @@ static int copy_user_bh(struct page *to, struct inode *inode,
 	return 0;
 }
 
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+		sector_t sector, bool pmd_entry, bool dirty)
+{
+	struct radix_tree_root *page_tree = &mapping->page_tree;
+	pgoff_t pmd_index = DAX_PMD_INDEX(index);
+	int type, error = 0;
+	void *entry;
+
+	WARN_ON_ONCE(pmd_entry && !dirty);
+	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+	spin_lock_irq(&mapping->tree_lock);
+
+	entry = radix_tree_lookup(page_tree, pmd_index);
+	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+		index = pmd_index;
+		goto dirty;
+	}
+
+	entry = radix_tree_lookup(page_tree, index);
+	if (entry) {
+		type = RADIX_DAX_TYPE(entry);
+		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+					type != RADIX_DAX_PMD)) {
+			error = -EIO;
+			goto unlock;
+		}
+
+		if (!pmd_entry || type == RADIX_DAX_PMD)
+			goto dirty;
+
+		/*
+		 * We only insert dirty PMD entries into the radix tree.  This
+		 * means we don't need to worry about removing a dirty PTE
+		 * entry and inserting a clean PMD entry, thus reducing the
+		 * range we would flush with a follow-up fsync/msync call.
+		 */
+		radix_tree_delete(&mapping->page_tree, index);
+		mapping->nrexceptional--;
+	}
+
+	if (sector == NO_SECTOR) {
+		/*
+		 * This can happen during correct operation if our pfn_mkwrite
+		 * fault raced against a hole punch operation.  If this
+		 * happens the pte that was hole punched will have been
+		 * unmapped and the radix tree entry will have been removed by
+		 * the time we are called, but the call will still happen.  We
+		 * will return all the way up to wp_pfn_shared(), where the
+		 * pte_same() check will fail, eventually causing page fault
+		 * to be retried by the CPU.
+		 */
+		goto unlock;
+	}
+
+	error = radix_tree_insert(page_tree, index,
+			RADIX_DAX_ENTRY(sector, pmd_entry));
+	if (error)
+		goto unlock;
+
+	mapping->nrexceptional++;
+ dirty:
+	if (dirty)
+		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+	spin_unlock_irq(&mapping->tree_lock);
+	return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+		struct address_space *mapping, pgoff_t index, void *entry)
+{
+	struct radix_tree_root *page_tree = &mapping->page_tree;
+	int type = RADIX_DAX_TYPE(entry);
+	struct radix_tree_node *node;
+	struct blk_dax_ctl dax;
+	void **slot;
+	int ret = 0;
+
+	spin_lock_irq(&mapping->tree_lock);
+	/*
+	 * Regular page slots are stabilized by the page lock even
+	 * without the tree itself locked.  These unlocked entries
+	 * need verification under the tree lock.
+	 */
+	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+		goto unlock;
+	if (*slot != entry)
+		goto unlock;
+
+	/* another fsync thread may have already written back this entry */
+	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+		goto unlock;
+
+	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+		ret = -EIO;
+		goto unlock;
+	}
+
+	dax.sector = RADIX_DAX_SECTOR(entry);
+	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+	spin_unlock_irq(&mapping->tree_lock);
+
+	/*
+	 * We cannot hold tree_lock while calling dax_map_atomic() because it
+	 * eventually calls cond_resched().
+	 */
+	ret = dax_map_atomic(bdev, &dax);
+	if (ret < 0)
+		return ret;
+
+	if (WARN_ON_ONCE(ret < dax.size)) {
+		ret = -EIO;
+		goto unmap;
+	}
+
+	wb_cache_pmem(dax.addr, dax.size);
+
+	spin_lock_irq(&mapping->tree_lock);
+	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+	spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+	dax_unmap_atomic(bdev, &dax);
+	return ret;
+
+ unlock:
+	spin_unlock_irq(&mapping->tree_lock);
+	return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+		loff_t end)
+{
+	struct inode *inode = mapping->host;
+	struct block_device *bdev = inode->i_sb->s_bdev;
+	pgoff_t start_index, end_index, pmd_index;
+	pgoff_t indices[PAGEVEC_SIZE];
+	struct pagevec pvec;
+	bool done = false;
+	int i, ret = 0;
+	void *entry;
+
+	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+		return -EIO;
+
+	start_index = start >> PAGE_CACHE_SHIFT;
+	end_index = end >> PAGE_CACHE_SHIFT;
+	pmd_index = DAX_PMD_INDEX(start_index);
+
+	rcu_read_lock();
+	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+	rcu_read_unlock();
+
+	/* see if the start of our range is covered by a PMD entry */
+	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		start_index = pmd_index;
+
+	tag_pages_for_writeback(mapping, start_index, end_index);
+
+	pagevec_init(&pvec, 0);
+	while (!done) {
+		pvec.nr = find_get_entries_tag(mapping, start_index,
+				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+				pvec.pages, indices);
+
+		if (pvec.nr == 0)
+			break;
+
+		for (i = 0; i < pvec.nr; i++) {
+			if (indices[i] > end_index) {
+				done = true;
+				break;
+			}
+
+			ret = dax_writeback_one(bdev, mapping, indices[i],
+					pvec.pages[i]);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	wmb_pmem();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 			struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -363,6 +558,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	}
 	dax_unmap_atomic(bdev, &dax);
 
+	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+			vmf->flags & FAULT_FLAG_WRITE);
+	if (error)
+		goto out;
+
 	error = vm_insert_mixed(vma, vaddr, dax.pfn);
 
  out:
@@ -408,6 +608,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	memset(&bh, 0, sizeof(bh));
 	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+	bh.b_bdev = inode->i_sb->s_bdev;
 	bh.b_size = PAGE_SIZE;
 
  repeat:
@@ -487,6 +688,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 		delete_from_page_cache(page);
 		unlock_page(page);
 		page_cache_release(page);
+		page = NULL;
 	}
 
 	/*
@@ -590,7 +792,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	struct block_device *bdev;
 	pgoff_t size, pgoff;
 	sector_t block;
-	int result = 0;
+	int error, result = 0;
+	bool alloc = false;
 
 	/* dax pmd mappings require pfn_t_devmap() */
 	if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +827,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	}
 
 	memset(&bh, 0, sizeof(bh));
+	bh.b_bdev = inode->i_sb->s_bdev;
 	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
 	bh.b_size = PMD_SIZE;
-	if (get_block(inode, block, &bh, write) != 0)
+
+	if (get_block(inode, block, &bh, 0) != 0)
 		return VM_FAULT_SIGBUS;
+
+	if (!buffer_mapped(&bh) && write) {
+		if (get_block(inode, block, &bh, 1) != 0)
+			return VM_FAULT_SIGBUS;
+		alloc = true;
+	}
+
 	bdev = bh.b_bdev;
-	i_mmap_lock_read(mapping);
 
 	/*
 	 * If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +850,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	 */
 	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
 		dax_pmd_dbg(&bh, address, "allocated block too small");
-		goto fallback;
+		return VM_FAULT_FALLBACK;
 	}
 
 	/*
 	 * If we allocated new storage, make sure no process has any
 	 * zero pages covering this hole
 	 */
-	if (buffer_new(&bh)) {
-		i_mmap_unlock_read(mapping);
-		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-		i_mmap_lock_read(mapping);
+	if (alloc) {
+		loff_t lstart = pgoff << PAGE_SHIFT;
+		loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+		truncate_pagecache_range(inode, lstart, lend);
 	}
 
+	i_mmap_lock_read(mapping);
+
 	/*
 	 * If a truncate happened while we were allocating blocks, we may
 	 * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -664,7 +878,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		goto out;
 	}
 	if ((pgoff | PG_PMD_COLOUR) >= size) {
-		dax_pmd_dbg(&bh, address, "pgoff unaligned");
+		dax_pmd_dbg(&bh, address,
+				"offset + huge page size > file size");
 		goto fallback;
 	}
 
@@ -732,6 +947,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		}
 		dax_unmap_atomic(bdev, &dax);
 
+		/*
+		 * For PTE faults we insert a radix tree entry for reads, and
+		 * leave it clean.  Then on the first write we dirty the radix
+		 * tree entry via the dax_pfn_mkwrite() path.  This sequence
+		 * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+		 * call into get_block() to translate the pgoff to a sector in
+		 * order to be able to create a new radix tree entry.
+		 *
+		 * The PMD path doesn't have an equivalent to
+		 * dax_pfn_mkwrite(), though, so for a read followed by a
+		 * write we traverse all the way through __dax_pmd_fault()
+		 * twice.  This means we can just skip inserting a radix tree
+		 * entry completely on the initial read and just wait until
+		 * the write to insert a dirty entry.
+		 */
+		if (write) {
+			error = dax_radix_entry(mapping, pgoff, dax.sector,
+					true, true);
+			if (error) {
+				dax_pmd_dbg(&bh, address,
+						"PMD radix insertion failed");
+				goto fallback;
+			}
+		}
+
 		dev_dbg(part_to_dev(bdev->bd_part),
 				"%s: %s addr: %lx pfn: %lx sect: %llx\n",
 				__func__, current->comm, address,
@@ -790,15 +1030,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
  * dax_pfn_mkwrite - handle first write to DAX page
  * @vma: The virtual memory area where the fault occurred
  * @vmf: The description of the fault
- *
  */
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	struct file *file = vma->vm_file;
 
-	sb_start_pagefault(sb);
-	file_update_time(vma->vm_file);
-	sb_end_pagefault(sb);
+	/*
+	 * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+	 * RADIX_DAX_PTE entry already exists in the radix tree from a
+	 * previous call to __dax_fault().  We just want to look up that PTE
+	 * entry using vmf->pgoff and make sure the dirty tag is set.  This
+	 * saves us from having to make a call to get_block() here to look
+	 * up the sector.
+	 */
+	dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
 	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1080,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 	BUG_ON((offset + length) > PAGE_CACHE_SIZE);
 
 	memset(&bh, 0, sizeof(bh));
+	bh.b_bdev = inode->i_sb->s_bdev;
 	bh.b_size = PAGE_CACHE_SIZE;
 	err = get_block(inode, index, &bh, 0);
 	if (err < 0)
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 11a42c5a09ae..2c88d683cd91 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 {
 	struct inode *inode = file_inode(vma->vm_file);
 	struct ext2_inode_info *ei = EXT2_I(inode);
-	int ret = VM_FAULT_NOPAGE;
 	loff_t size;
+	int ret;
 
 	sb_start_pagefault(inode->i_sb);
 	file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (vmf->pgoff >= size)
 		ret = VM_FAULT_SIGBUS;
+	else
+		ret = dax_pfn_mkwrite(vma, vmf);
 
 	up_read(&ei->dax_sem);
 	sb_end_pagefault(inode->i_sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8eb87e3e2752..1126436dada1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
 {
 	struct inode *inode = file_inode(vma->vm_file);
 	struct super_block *sb = inode->i_sb;
-	int ret = VM_FAULT_NOPAGE;
 	loff_t size;
+	int ret;
 
 	sb_start_pagefault(sb);
 	file_update_time(vma->vm_file);
@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (vmf->pgoff >= size)
 		ret = VM_FAULT_SIGBUS;
+	else
+		ret = dax_pfn_mkwrite(vma, vmf);
 	up_read(&EXT4_I(inode)->i_mmap_sem);
 	sb_end_pagefault(sb);
 
diff --git a/fs/inode.c b/fs/inode.c
index bb8685220292..9f62db3bcc3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
 	 */
 	spin_lock_irq(&inode->i_data.tree_lock);
 	BUG_ON(inode->i_data.nrpages);
-	BUG_ON(inode->i_data.nrshadows);
+	BUG_ON(inode->i_data.nrexceptional);
 	spin_unlock_irq(&inode->i_data.tree_lock);
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a3750f902adc..0ae91ad6df2d 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
 #include "nodelist.h"
 
 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
 	return 0;
 
  out_free:
-#ifndef __ECOS
-	if (jffs2_blocks_use_vmalloc(c))
-		vfree(c->blocks);
-	else
-#endif
-		kfree(c->blocks);
+	kvfree(c->blocks);
 
 	return ret;
 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2caf1682036d..bead25ae8fe4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 out_root:
 	jffs2_free_ino_caches(c);
 	jffs2_free_raw_node_refs(c);
-	if (jffs2_blocks_use_vmalloc(c))
-		vfree(c->blocks);
-	else
-		kfree(c->blocks);
+	kvfree(c->blocks);
  out_inohash:
 	jffs2_clear_xattr_subsystem(c);
 	kfree(c->inocache_list);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb080c272149..0a9a114bb9d1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
 
 	jffs2_free_ino_caches(c);
 	jffs2_free_raw_node_refs(c);
-	if (jffs2_blocks_use_vmalloc(c))
-		vfree(c->blocks);
-	else
-		kfree(c->blocks);
+	kvfree(c->blocks);
 	jffs2_flash_cleanup(c);
 	kfree(c->inocache_list);
 	jffs2_clear_xattr_subsystem(c);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index bb1f4e7a3270..3384dc8e6683 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
 	u32 i, j;
 
 	if (fl->commit_through_mds) {
-		nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+		nfs_request_add_commit_list(req, cinfo);
 	} else {
 		/* Note that we are calling nfs4_fl_calc_j_index on each page
 		 * that ends up being committed to a data server.  An attractive
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6594e9f903a0..5bcd92d50e82 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
 	start = xdr_reserve_space(xdr, 4);
 	BUG_ON(!start);
 
-	if (ff_layout_encode_ioerr(flo, xdr, args))
-		goto out;
-
+	ff_layout_encode_ioerr(flo, xdr, args);
 	ff_layout_encode_iostats(flo, xdr, args);
-out:
+
 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
 	dprintk("%s: Return\n", __func__);
 }
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bd0327541366..29898a9550fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
 	err->length = end - err->offset;
 }
 
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err,  u64 offset,
-			       u64 length, int status, enum nfs_opnum4 opnum,
-			       nfs4_stateid *stateid,
-			       struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+		const struct nfs4_ff_layout_ds_err *e2)
 {
-	return err->status == status && err->opnum == opnum &&
-	       nfs4_stateid_match(&err->stateid, stateid) &&
-	       !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
-	       end_offset(err->offset, err->length) >= offset &&
-	       err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
-			   struct nfs4_ff_layout_ds_err *new)
-{
-	if (!ds_error_can_merge(old, new->offset, new->length, new->status,
-				new->opnum, &new->stateid, &new->deviceid))
-		return false;
-
-	extend_ds_error(old, new->offset, new->length);
-	return true;
+	int ret;
+
+	if (e1->opnum != e2->opnum)
+		return e1->opnum < e2->opnum ? -1 : 1;
+	if (e1->status != e2->status)
+		return e1->status < e2->status ? -1 : 1;
+	ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+	if (ret != 0)
+		return ret;
+	ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+	if (ret != 0)
+		return ret;
+	if (end_offset(e1->offset, e1->length) < e2->offset)
+		return -1;
+	if (e1->offset > end_offset(e2->offset, e2->length))
+		return 1;
+	/* If ranges overlap or are contiguous, they are the same */
+	return 0;
 }
 
-static bool
+static void
 ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
 			      struct nfs4_ff_layout_ds_err *dserr)
 {
-	struct nfs4_ff_layout_ds_err *err;
-
-	list_for_each_entry(err, &flo->error_list, list) {
-		if (merge_ds_error(err, dserr)) {
-			return true;
-		}
-	}
-
-	list_add(&dserr->list, &flo->error_list);
-	return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
-			  u64 length, int status, enum nfs_opnum4 opnum,
-			  nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
-	bool found = false;
-	struct nfs4_ff_layout_ds_err *err;
-
-	list_for_each_entry(err, &flo->error_list, list) {
-		if (ds_error_can_merge(err, offset, length, status, opnum,
-				       stateid, deviceid)) {
-			found = true;
-			extend_ds_error(err, offset, length);
+	struct nfs4_ff_layout_ds_err *err, *tmp;
+	struct list_head *head = &flo->error_list;
+	int match;
+
+	/* Do insertion sort w/ merges */
+	list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+		match = ff_ds_error_match(err, dserr);
+		if (match < 0)
+			continue;
+		if (match > 0) {
+			/* Add entry "dserr" _before_ entry "err" */
+			head = &err->list;
 			break;
 		}
+		/* Entries match, so merge "err" into "dserr" */
+		extend_ds_error(dserr, err->offset, err->length);
+		list_del(&err->list);
+		kfree(err);
 	}
 
-	return found;
+	list_add_tail(&dserr->list, head);
 }
 
 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
 			     gfp_t gfp_flags)
 {
 	struct nfs4_ff_layout_ds_err *dserr;
-	bool needfree;
 
 	if (status == 0)
 		return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
 	if (mirror->mirror_ds == NULL)
 		return -EINVAL;
 
-	spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-	if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
-				      &mirror->stateid,
-				      &mirror->mirror_ds->id_node.deviceid)) {
-		spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-		return 0;
-	}
-	spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
 	dserr = kmalloc(sizeof(*dserr), gfp_flags);
 	if (!dserr)
 		return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
 	       NFS4_DEVICEID4_SIZE);
 
 	spin_lock(&flo->generic_hdr.plh_inode->i_lock);
-	needfree = ff_layout_add_ds_error_locked(flo, dserr);
+	ff_layout_add_ds_error_locked(flo, dserr);
 	spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-	if (needfree)
-		kfree(dserr);
 
 	return 0;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4e8cc942336c..9a547aa3ec8e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
 		      struct nfs_commit_info *cinfo,
 		      u32 ds_commit_idx);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
 				 struct nfs_commit_info *cinfo);
 void nfs_request_add_commit_list_locked(struct nfs_page *req,
 		struct list_head *dst,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce43cd6d88c6..5754835a2886 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
-			    struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
 	spin_lock(cinfo->lock);
-	nfs_request_add_commit_list_locked(req, dst, cinfo);
+	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
 	spin_unlock(cinfo->lock);
 	nfs_mark_page_unstable(req->wb_page, cinfo);
 }
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 {
 	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
 		return;
-	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+	nfs_request_add_commit_list(req, cinfo);
 }
 
 static void
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0fbb4c7c72e8..a522c15a0bfd 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
 {
 	int i;
 	int nr_groups = bitmap->s_nr_groups;
-	int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-						nr_groups);
 
 	for (i = 0; i < nr_groups; i++)
 		if (bitmap->s_block_bitmap[i])
 			brelse(bitmap->s_block_bitmap[i]);
 
-	if (size <= PAGE_SIZE)
-		kfree(bitmap);
-	else
-		vfree(bitmap);
+	kvfree(bitmap);
 }
 
 static void udf_free_partition(struct udf_part_map *map)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bb2b8f354041..52883ac3cf84 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
 /*
  * pfn_mkwrite was originally inteneded to ensure we capture time stamp
  * updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
  */
 static int
 xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (vmf->pgoff >= size)
 		ret = VM_FAULT_SIGBUS;
+	else if (IS_DAX(inode))
+		ret = dax_pfn_mkwrite(vma, vmf);
 	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
 	sb_end_pagefault(inode->i_sb);
 	return ret;