aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/coda/coda_linux.h3
-rw-r--r--fs/dax.c274
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jffs2/build.c8
-rw-r--r--fs/jffs2/fs.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c99
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_file.c7
16 files changed, 325 insertions, 112 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2c3aeab17e20..7b9cd49622b1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
invalidate_bh_lrus();
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index f829fe963f5b..5104d84c4f64 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
} while (0)
-#define CODA_FREE(ptr,size) \
- do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
/* inode to cnode access functions */
diff --git a/fs/dax.c b/fs/dax.c
index 55aa273145a8..4fd6b0c5c6b5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
@@ -245,6 +246,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
loff_t end = pos + iov_iter_count(iter);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
struct address_space *mapping = inode->i_mapping;
@@ -324,6 +326,199 @@ static int copy_user_bh(struct page *to, struct inode *inode,
return 0;
}
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+ sector_t sector, bool pmd_entry, bool dirty)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ pgoff_t pmd_index = DAX_PMD_INDEX(index);
+ int type, error = 0;
+ void *entry;
+
+ WARN_ON_ONCE(pmd_entry && !dirty);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ spin_lock_irq(&mapping->tree_lock);
+
+ entry = radix_tree_lookup(page_tree, pmd_index);
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+ index = pmd_index;
+ goto dirty;
+ }
+
+ entry = radix_tree_lookup(page_tree, index);
+ if (entry) {
+ type = RADIX_DAX_TYPE(entry);
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+ type != RADIX_DAX_PMD)) {
+ error = -EIO;
+ goto unlock;
+ }
+
+ if (!pmd_entry || type == RADIX_DAX_PMD)
+ goto dirty;
+
+ /*
+ * We only insert dirty PMD entries into the radix tree. This
+ * means we don't need to worry about removing a dirty PTE
+ * entry and inserting a clean PMD entry, thus reducing the
+ * range we would flush with a follow-up fsync/msync call.
+ */
+ radix_tree_delete(&mapping->page_tree, index);
+ mapping->nrexceptional--;
+ }
+
+ if (sector == NO_SECTOR) {
+ /*
+ * This can happen during correct operation if our pfn_mkwrite
+ * fault raced against a hole punch operation. If this
+ * happens the pte that was hole punched will have been
+ * unmapped and the radix tree entry will have been removed by
+ * the time we are called, but the call will still happen. We
+ * will return all the way up to wp_pfn_shared(), where the
+ * pte_same() check will fail, eventually causing page fault
+ * to be retried by the CPU.
+ */
+ goto unlock;
+ }
+
+ error = radix_tree_insert(page_tree, index,
+ RADIX_DAX_ENTRY(sector, pmd_entry));
+ if (error)
+ goto unlock;
+
+ mapping->nrexceptional++;
+ dirty:
+ if (dirty)
+ radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+ struct address_space *mapping, pgoff_t index, void *entry)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ int type = RADIX_DAX_TYPE(entry);
+ struct radix_tree_node *node;
+ struct blk_dax_ctl dax;
+ void **slot;
+ int ret = 0;
+
+ spin_lock_irq(&mapping->tree_lock);
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+
+ /* another fsync thread may have already written back this entry */
+ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+ goto unlock;
+
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ dax.sector = RADIX_DAX_SECTOR(entry);
+ dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+ spin_unlock_irq(&mapping->tree_lock);
+
+ /*
+ * We cannot hold tree_lock while calling dax_map_atomic() because it
+ * eventually calls cond_resched().
+ */
+ ret = dax_map_atomic(bdev, &dax);
+ if (ret < 0)
+ return ret;
+
+ if (WARN_ON_ONCE(ret < dax.size)) {
+ ret = -EIO;
+ goto unmap;
+ }
+
+ wb_cache_pmem(dax.addr, dax.size);
+
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+ spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+ dax_unmap_atomic(bdev, &dax);
+ return ret;
+
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+ loff_t end)
+{
+ struct inode *inode = mapping->host;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ pgoff_t start_index, end_index, pmd_index;
+ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ bool done = false;
+ int i, ret = 0;
+ void *entry;
+
+ if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+ return -EIO;
+
+ start_index = start >> PAGE_CACHE_SHIFT;
+ end_index = end >> PAGE_CACHE_SHIFT;
+ pmd_index = DAX_PMD_INDEX(start_index);
+
+ rcu_read_lock();
+ entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+ rcu_read_unlock();
+
+ /* see if the start of our range is covered by a PMD entry */
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+ start_index = pmd_index;
+
+ tag_pages_for_writeback(mapping, start_index, end_index);
+
+ pagevec_init(&pvec, 0);
+ while (!done) {
+ pvec.nr = find_get_entries_tag(mapping, start_index,
+ PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+ pvec.pages, indices);
+
+ if (pvec.nr == 0)
+ break;
+
+ for (i = 0; i < pvec.nr; i++) {
+ if (indices[i] > end_index) {
+ done = true;
+ break;
+ }
+
+ ret = dax_writeback_one(bdev, mapping, indices[i],
+ pvec.pages[i]);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ wmb_pmem();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
@@ -363,6 +558,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
}
dax_unmap_atomic(bdev, &dax);
+ error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+ vmf->flags & FAULT_FLAG_WRITE);
+ if (error)
+ goto out;
+
error = vm_insert_mixed(vma, vaddr, dax.pfn);
out:
@@ -408,6 +608,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
memset(&bh, 0, sizeof(bh));
block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_SIZE;
repeat:
@@ -487,6 +688,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
delete_from_page_cache(page);
unlock_page(page);
page_cache_release(page);
+ page = NULL;
}
/*
@@ -590,7 +792,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
struct block_device *bdev;
pgoff_t size, pgoff;
sector_t block;
- int result = 0;
+ int error, result = 0;
+ bool alloc = false;
/* dax pmd mappings require pfn_t_devmap() */
if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +827,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
bh.b_size = PMD_SIZE;
- if (get_block(inode, block, &bh, write) != 0)
+
+ if (get_block(inode, block, &bh, 0) != 0)
return VM_FAULT_SIGBUS;
+
+ if (!buffer_mapped(&bh) && write) {
+ if (get_block(inode, block, &bh, 1) != 0)
+ return VM_FAULT_SIGBUS;
+ alloc = true;
+ }
+
bdev = bh.b_bdev;
- i_mmap_lock_read(mapping);
/*
* If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +850,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
*/
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
dax_pmd_dbg(&bh, address, "allocated block too small");
- goto fallback;
+ return VM_FAULT_FALLBACK;
}
/*
* If we allocated new storage, make sure no process has any
* zero pages covering this hole
*/
- if (buffer_new(&bh)) {
- i_mmap_unlock_read(mapping);
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
- i_mmap_lock_read(mapping);
+ if (alloc) {
+ loff_t lstart = pgoff << PAGE_SHIFT;
+ loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+ truncate_pagecache_range(inode, lstart, lend);
}
+ i_mmap_lock_read(mapping);
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
@@ -664,7 +878,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
if ((pgoff | PG_PMD_COLOUR) >= size) {
- dax_pmd_dbg(&bh, address, "pgoff unaligned");
+ dax_pmd_dbg(&bh, address,
+ "offset + huge page size > file size");
goto fallback;
}
@@ -732,6 +947,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
dax_unmap_atomic(bdev, &dax);
+ /*
+ * For PTE faults we insert a radix tree entry for reads, and
+ * leave it clean. Then on the first write we dirty the radix
+ * tree entry via the dax_pfn_mkwrite() path. This sequence
+ * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+ * call into get_block() to translate the pgoff to a sector in
+ * order to be able to create a new radix tree entry.
+ *
+ * The PMD path doesn't have an equivalent to
+ * dax_pfn_mkwrite(), though, so for a read followed by a
+ * write we traverse all the way through __dax_pmd_fault()
+ * twice. This means we can just skip inserting a radix tree
+ * entry completely on the initial read and just wait until
+ * the write to insert a dirty entry.
+ */
+ if (write) {
+ error = dax_radix_entry(mapping, pgoff, dax.sector,
+ true, true);
+ if (error) {
+ dax_pmd_dbg(&bh, address,
+ "PMD radix insertion failed");
+ goto fallback;
+ }
+ }
+
dev_dbg(part_to_dev(bdev->bd_part),
"%s: %s addr: %lx pfn: %lx sect: %llx\n",
__func__, current->comm, address,
@@ -790,15 +1030,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
* dax_pfn_mkwrite - handle first write to DAX page
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
- *
*/
int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct file *file = vma->vm_file;
- sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
- sb_end_pagefault(sb);
+ /*
+ * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+ * RADIX_DAX_PTE entry already exists in the radix tree from a
+ * previous call to __dax_fault(). We just want to look up that PTE
+ * entry using vmf->pgoff and make sure the dirty tag is set. This
+ * saves us from having to make a call to get_block() here to look
+ * up the sector.
+ */
+ dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1080,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
BUG_ON((offset + length) > PAGE_CACHE_SIZE);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_CACHE_SIZE;
err = get_block(inode, index, &bh, 0);
if (err < 0)
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 11a42c5a09ae..2c88d683cd91 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8eb87e3e2752..1126436dada1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
{
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
diff --git a/fs/inode.c b/fs/inode.c
index bb8685220292..9f62db3bcc3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrshadows);
+ BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a3750f902adc..0ae91ad6df2d 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
#include "nodelist.h"
static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
return 0;
out_free:
-#ifndef __ECOS
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
-#endif
- kfree(c->blocks);
+ kvfree(c->blocks);
return ret;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2caf1682036d..bead25ae8fe4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
out_root:
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
out_inohash:
jffs2_clear_xattr_subsystem(c);
kfree(c->inocache_list);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb080c272149..0a9a114bb9d1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
jffs2_flash_cleanup(c);
kfree(c->inocache_list);
jffs2_clear_xattr_subsystem(c);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index bb1f4e7a3270..3384dc8e6683 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
u32 i, j;
if (fl->commit_through_mds) {
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
} else {
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6594e9f903a0..5bcd92d50e82 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
start = xdr_reserve_space(xdr, 4);
BUG_ON(!start);
- if (ff_layout_encode_ioerr(flo, xdr, args))
- goto out;
-
+ ff_layout_encode_ioerr(flo, xdr, args);
ff_layout_encode_iostats(flo, xdr, args);
-out:
+
*start = cpu_to_be32((xdr->p - start - 1) * 4);
dprintk("%s: Return\n", __func__);
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bd0327541366..29898a9550fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
err->length = end - err->offset;
}
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid,
- struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+ const struct nfs4_ff_layout_ds_err *e2)
{
- return err->status == status && err->opnum == opnum &&
- nfs4_stateid_match(&err->stateid, stateid) &&
- !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
- end_offset(err->offset, err->length) >= offset &&
- err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
- struct nfs4_ff_layout_ds_err *new)
-{
- if (!ds_error_can_merge(old, new->offset, new->length, new->status,
- new->opnum, &new->stateid, &new->deviceid))
- return false;
-
- extend_ds_error(old, new->offset, new->length);
- return true;
+ int ret;
+
+ if (e1->opnum != e2->opnum)
+ return e1->opnum < e2->opnum ? -1 : 1;
+ if (e1->status != e2->status)
+ return e1->status < e2->status ? -1 : 1;
+ ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+ if (ret != 0)
+ return ret;
+ ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+ if (ret != 0)
+ return ret;
+ if (end_offset(e1->offset, e1->length) < e2->offset)
+ return -1;
+ if (e1->offset > end_offset(e2->offset, e2->length))
+ return 1;
+ /* If ranges overlap or are contiguous, they are the same */
+ return 0;
}
-static bool
+static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_ds_err *dserr)
{
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (merge_ds_error(err, dserr)) {
- return true;
- }
- }
-
- list_add(&dserr->list, &flo->error_list);
- return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
- bool found = false;
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (ds_error_can_merge(err, offset, length, status, opnum,
- stateid, deviceid)) {
- found = true;
- extend_ds_error(err, offset, length);
+ struct nfs4_ff_layout_ds_err *err, *tmp;
+ struct list_head *head = &flo->error_list;
+ int match;
+
+ /* Do insertion sort w/ merges */
+ list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+ match = ff_ds_error_match(err, dserr);
+ if (match < 0)
+ continue;
+ if (match > 0) {
+ /* Add entry "dserr" _before_ entry "err" */
+ head = &err->list;
break;
}
+ /* Entries match, so merge "err" into "dserr" */
+ extend_ds_error(dserr, err->offset, err->length);
+ list_del(&err->list);
+ kfree(err);
}
- return found;
+ list_add_tail(&dserr->list, head);
}
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
gfp_t gfp_flags)
{
struct nfs4_ff_layout_ds_err *dserr;
- bool needfree;
if (status == 0)
return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
if (mirror->mirror_ds == NULL)
return -EINVAL;
- spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
- &mirror->stateid,
- &mirror->mirror_ds->id_node.deviceid)) {
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- return 0;
- }
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
dserr = kmalloc(sizeof(*dserr), gfp_flags);
if (!dserr)
return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
NFS4_DEVICEID4_SIZE);
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- needfree = ff_layout_add_ds_error_locked(flo, dserr);
+ ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- if (needfree)
- kfree(dserr);
return 0;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4e8cc942336c..9a547aa3ec8e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void nfs_request_add_commit_list_locked(struct nfs_page *req,
struct list_head *dst,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce43cd6d88c6..5754835a2886 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
- struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
spin_lock(cinfo->lock);
- nfs_request_add_commit_list_locked(req, dst, cinfo);
+ nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock);
nfs_mark_page_unstable(req->wb_page, cinfo);
}
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
{
if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
return;
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
}
static void
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0fbb4c7c72e8..a522c15a0bfd 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
{
int i;
int nr_groups = bitmap->s_nr_groups;
- int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
- nr_groups);
for (i = 0; i < nr_groups; i++)
if (bitmap->s_block_bitmap[i])
brelse(bitmap->s_block_bitmap[i]);
- if (size <= PAGE_SIZE)
- kfree(bitmap);
- else
- vfree(bitmap);
+ kvfree(bitmap);
}
static void udf_free_partition(struct udf_part_map *map)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bb2b8f354041..52883ac3cf84 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
*/
static int
xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else if (IS_DAX(inode))
+ ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;