diff options
author | Linus Torvalds | 2019-12-02 14:46:22 -0800 |
---|---|---|
committer | Linus Torvalds | 2019-12-02 14:46:22 -0800 |
commit | 97eeb4d9d755605385fa329da9afa38729f3413c (patch) | |
tree | fc63d9f43fc7235a9fe5cfaf03d73ec03dc5f2a6 | |
parent | 9b326948c23908692d7dfe56ed149840d3829eaa (diff) | |
parent | 8feb4732ff9f2732354b44c4418569974e2f949c (diff) |
Merge tag 'xfs-5.5-merge-16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull XFS updates from Darrick Wong:
"For this release, we changed quite a few things.
Highlights:
- Fixed some long tail latency problems in the block allocator
- Removed some long deprecated (and for the past several years no-op)
mount options and ioctls
- Strengthened the extended attribute and directory verifiers
- Audited and fixed all the places where we could return EFSCORRUPTED
without logging anything
- Refactored the old SGI space allocation ioctls to make the
equivalent fallocate calls
- Fixed a race between fallocate and directio
- Fixed an integer overflow when files have more than a few
billion(!) extents
- Fixed a longstanding bug where quota accounting could be incorrect
when performing unwritten extent conversion on a freshly mounted fs
- Fixed various complaints in scrub about soft lockups and
unresponsiveness to signals
- De-vtable'd the directory handling code, which should make it
faster
- Converted to the new mount api, for better or for worse
- Cleaned up some memory leaks
and quite a lot of other smaller fixes and cleanups.
A more detailed summary:
- Fill out the build string
- Prevent inode fork extent count overflows
- Refactor the allocator to reduce long tail latency
- Rework incore log locking a little to reduce spinning
- Break up the xfs_iomap_begin functions into smaller more cohesive
parts
- Fix allocation alignment being dropped too early when the
allocation request is for more blocks than an AG is large
- Other small cleanups
- Clean up file buftarg retrieval helpers
- Hoist the resvsp and unresvsp ioctls to the vfs
- Remove the undocumented biosize mount option, since it has never
been mentioned as existing or supported on linux
- Clean up some of the mount option printing and parsing
- Enhance attr leaf verifier to check block structure
- Check dirent and attr names for invalid characters before passing
them to the vfs
- Refactor open-coded bmbt walking
- Fix a few places where we return EIO instead of EFSCORRUPTED after
failing metadata sanity checks
- Fix a synchronization problem between fallocate and aio dio
corrupting the file length
- Clean up various loose ends in the iomap and bmap code
- Convert to the new mount api
- Make sure we always log something when returning EFSCORRUPTED
- Fix some problems where long running scrub loops could trigger soft
lockup warnings and/or fail to exit due to fatal signals pending
- Fix various Coverity complaints
- Remove most of the function pointers from the directory code to
reduce indirection penalties
- Ensure that dquots are attached to the inode when performing
unwritten extent conversion after io
- Deuglify incore projid and crtime types
- Fix another AGI/AGF locking order deadlock when renaming
- Clean up some quota typedefs
- Remove the FSSETDM ioctls which haven't done anything in 20 years
- Fix some memory leaks when mounting the log fails
- Fix an underflow when updating an xattr leaf freemap
- Remove some trivial wrappers
- Report metadata corruption as an error, not a (potentially) fatal
assertion
- Clean up the dir/attr buffer mapping code
- Allow fatal signals to kill scrub during parent pointer checks"
* tag 'xfs-5.5-merge-16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (198 commits)
xfs: allow parent directory scans to be interrupted with fatal signals
xfs: remove the mappedbno argument to xfs_da_get_buf
xfs: remove the mappedbno argument to xfs_da_read_buf
xfs: split xfs_da3_node_read
xfs: remove the mappedbno argument to xfs_dir3_leafn_read
xfs: remove the mappedbno argument to xfs_dir3_leaf_read
xfs: remove the mappedbno argument to xfs_attr3_leaf_read
xfs: remove the mappedbno argument to xfs_da_reada_buf
xfs: improve the xfs_dabuf_map calling conventions
xfs: refactor xfs_dabuf_map
xfs: simplify mappedbno handling in xfs_da_{get,read}_buf
xfs: report corruption only as a regular error
xfs: Remove kmem_zone_free() wrapper
xfs: Remove kmem_zone_destroy() wrapper
xfs: Remove slab init wrappers
xfs: fix attr leaf header freemap.size underflow
xfs: fix some memory leaks in log recovery
xfs: fix another missing include
xfs: remove XFS_IOC_FSSETDM and XFS_IOC_FSSETDM_BY_HANDLE
xfs: remove duplicated include from xfs_dir2_data.c
...
126 files changed, 5823 insertions, 6266 deletions
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9ae90d728c0f..358ea2ecf36b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -185,15 +185,27 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, /* handled by some ->ioctl(); always a pointer to int */ case FIONREAD: goto found_handler; - /* these two get messy on amd64 due to alignment differences */ + /* these get messy on amd64 due to alignment differences */ #if defined(CONFIG_X86_64) case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: - error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); + error = compat_ioctl_preallocate(f.file, 0, compat_ptr(arg)); + goto out_fput; + case FS_IOC_UNRESVSP_32: + case FS_IOC_UNRESVSP64_32: + error = compat_ioctl_preallocate(f.file, FALLOC_FL_PUNCH_HOLE, + compat_ptr(arg)); + goto out_fput; + case FS_IOC_ZERO_RANGE_32: + error = compat_ioctl_preallocate(f.file, FALLOC_FL_ZERO_RANGE, + compat_ptr(arg)); goto out_fput; #else case FS_IOC_RESVSP: case FS_IOC_RESVSP64: + case FS_IOC_UNRESVSP: + case FS_IOC_UNRESVSP64: + case FS_IOC_ZERO_RANGE: goto found_handler; #endif diff --git a/fs/ioctl.c b/fs/ioctl.c index 812061ba667a..2f5e4e5b97e1 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -467,7 +467,7 @@ EXPORT_SYMBOL(generic_block_fiemap); * Only the l_start, l_len and l_whence fields of the 'struct space_resv' * are used here, rest are ignored. */ -int ioctl_preallocate(struct file *filp, void __user *argp) +int ioctl_preallocate(struct file *filp, int mode, void __user *argp) { struct inode *inode = file_inode(filp); struct space_resv sr; @@ -488,13 +488,14 @@ int ioctl_preallocate(struct file *filp, void __user *argp) return -EINVAL; } - return vfs_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); + return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, + sr.l_len); } /* on ia32 l_start is on a 32-bit boundary */ #if defined CONFIG_COMPAT && defined(CONFIG_X86_64) /* just account for different alignment */ -int compat_ioctl_preallocate(struct file *file, +int compat_ioctl_preallocate(struct file *file, int mode, struct space_resv_32 __user *argp) { struct inode *inode = file_inode(file); @@ -516,7 +517,7 @@ int compat_ioctl_preallocate(struct file *file, return -EINVAL; } - return vfs_fallocate(file, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); + return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); } #endif @@ -533,7 +534,12 @@ static int file_ioctl(struct file *filp, unsigned int cmd, return put_user(i_size_read(inode) - filp->f_pos, p); case FS_IOC_RESVSP: case FS_IOC_RESVSP64: - return ioctl_preallocate(filp, p); + return ioctl_preallocate(filp, 0, p); + case FS_IOC_UNRESVSP: + case FS_IOC_UNRESVSP64: + return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p); + case FS_IOC_ZERO_RANGE: + return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p); } return vfs_ioctl(filp, cmd, arg); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 06b68b6115bc..aceca2f9a3db 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -27,7 +27,6 @@ xfs-y += $(addprefix libxfs/, \ xfs_bmap_btree.o \ xfs_btree.o \ xfs_da_btree.o \ - xfs_da_format.o \ xfs_defer.o \ xfs_dir2.o \ xfs_dir2_block.o \ diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index da031b93e182..1da94237a8cf 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -32,7 +32,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) /* - * __vmalloc() will allocate data pages and auxillary structures (e.g. + * __vmalloc() will allocate data pages and auxiliary structures (e.g. * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence * we need to tell memory reclaim that we are in such a context via * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 8170d95cf930..6143117770e9 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -78,39 +78,9 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) * Zone interfaces */ -#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN -#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT -#define KM_ZONE_SPREAD SLAB_MEM_SPREAD -#define KM_ZONE_ACCOUNT SLAB_ACCOUNT - #define kmem_zone kmem_cache #define kmem_zone_t struct kmem_cache -static inline kmem_zone_t * -kmem_zone_init(int size, char *zone_name) -{ - return kmem_cache_create(zone_name, size, 0, 0, NULL); -} - -static inline kmem_zone_t * -kmem_zone_init_flags(int size, char *zone_name, slab_flags_t flags, - void (*construct)(void *)) -{ - return kmem_cache_create(zone_name, size, 0, flags, construct); -} - -static inline void -kmem_zone_free(kmem_zone_t *zone, void *ptr) -{ - kmem_cache_free(zone, ptr); -} - -static inline void -kmem_zone_destroy(kmem_zone_t *zone) -{ - kmem_cache_destroy(zone); -} - extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); static inline void * diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 87a9747f1d36..fdfe6dc0d307 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -19,6 +19,8 @@ #include "xfs_btree.h" #include "xfs_refcount_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_sb.h" +#include "xfs_ag_resv.h" /* * Per-AG Block Reservations diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 533b04aaf6f6..c284e10af491 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -146,9 +146,13 @@ xfs_alloc_lookup_eq( xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { + int error; + cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); + error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); + cur->bc_private.a.priv.abt.active = (*stat == 1); + return error; } /* @@ -162,9 +166,13 @@ xfs_alloc_lookup_ge( xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { + int error; + cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); + error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); + cur->bc_private.a.priv.abt.active = (*stat == 1); + return error; } /* @@ -178,9 +186,19 @@ xfs_alloc_lookup_le( xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { + int error; cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; - return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); + error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); + cur->bc_private.a.priv.abt.active = (*stat == 1); + return error; +} + +static inline bool +xfs_alloc_cur_active( + struct xfs_btree_cur *cur) +{ + return cur && cur->bc_private.a.priv.abt.active; } /* @@ -313,7 +331,7 @@ xfs_alloc_compute_diff( xfs_extlen_t newlen1=0; /* length with newbno1 */ xfs_extlen_t newlen2=0; /* length with newbno2 */ xfs_agblock_t wantend; /* end of target extent */ - bool userdata = xfs_alloc_is_userdata(datatype); + bool userdata = datatype & XFS_ALLOC_USERDATA; ASSERT(freelen >= wantlen); freeend = freebno + freelen; @@ -433,13 +451,17 @@ xfs_alloc_fixup_trees( #ifdef DEBUG if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, - i == 1 && nfbno1 == fbno && nflen1 == flen); + if (XFS_IS_CORRUPT(mp, + i != 1 || + nfbno1 != fbno || + nflen1 != flen)) + return -EFSCORRUPTED; #endif } else { if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } /* * Look up the record in the by-block tree if necessary. @@ -448,13 +470,17 @@ xfs_alloc_fixup_trees( #ifdef DEBUG if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, - i == 1 && nfbno1 == fbno && nflen1 == flen); + if (XFS_IS_CORRUPT(mp, + i != 1 || + nfbno1 != fbno || + nflen1 != flen)) + return -EFSCORRUPTED; #endif } else { if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } #ifdef DEBUG @@ -465,8 +491,10 @@ xfs_alloc_fixup_trees( bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); - XFS_WANT_CORRUPTED_RETURN(mp, - bnoblock->bb_numrecs == cntblock->bb_numrecs); + if (XFS_IS_CORRUPT(mp, + bnoblock->bb_numrecs != + cntblock->bb_numrecs)) + return -EFSCORRUPTED; } #endif @@ -496,25 +524,30 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; /* * Add new by-size btree entry(s). */ if (nfbno1 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 0); + if (XFS_IS_CORRUPT(mp, i != 0)) + return -EFSCORRUPTED; if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } if (nfbno2 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 0); + if (XFS_IS_CORRUPT(mp, i != 0)) + return -EFSCORRUPTED; if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } /* * Fix up the by-block btree entry(s). @@ -525,7 +558,8 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(bno_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } else { /* * Update the by-block entry to start later|be shorter. @@ -539,10 +573,12 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 0); + if (XFS_IS_CORRUPT(mp, i != 0)) + return -EFSCORRUPTED; if ((error = xfs_btree_insert(bno_cur, &i))) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } return 0; } @@ -684,16 +720,298 @@ xfs_alloc_update_counters( xfs_trans_agblocks_delta(tp, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > - be32_to_cpu(agf->agf_length))) + be32_to_cpu(agf->agf_length))) { + xfs_buf_corruption_error(agbp); return -EFSCORRUPTED; + } xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); return 0; } /* - * Allocation group level functions. + * Block allocation algorithm and data structures. + */ +struct xfs_alloc_cur { + struct xfs_btree_cur *cnt; /* btree cursors */ + struct xfs_btree_cur *bnolt; + struct xfs_btree_cur *bnogt; + xfs_extlen_t cur_len;/* current search length */ + xfs_agblock_t rec_bno;/* extent startblock */ + xfs_extlen_t rec_len;/* extent length */ + xfs_agblock_t bno; /* alloc bno */ + xfs_extlen_t len; /* alloc len */ + xfs_extlen_t diff; /* diff from search bno */ + unsigned int busy_gen;/* busy state */ + bool busy; +}; + +/* + * Set up cursors, etc. in the extent allocation cursor. This function can be + * called multiple times to reset an initialized structure without having to + * reallocate cursors. + */ +static int +xfs_alloc_cur_setup( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur) +{ + int error; + int i; + + ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO); + + acur->cur_len = args->maxlen; + acur->rec_bno = 0; + acur->rec_len = 0; + acur->bno = 0; + acur->len = 0; + acur->diff = -1; + acur->busy = false; + acur->busy_gen = 0; + + /* + * Perform an initial cntbt lookup to check for availability of maxlen + * extents. If this fails, we'll return -ENOSPC to signal the caller to + * attempt a small allocation. + */ + if (!acur->cnt) + acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_CNT); + error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i); + if (error) + return error; + + /* + * Allocate the bnobt left and right search cursors. + */ + if (!acur->bnolt) + acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); + if (!acur->bnogt) + acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); + return i == 1 ? 0 : -ENOSPC; +} + +static void +xfs_alloc_cur_close( + struct xfs_alloc_cur *acur, + bool error) +{ + int cur_error = XFS_BTREE_NOERROR; + + if (error) + cur_error = XFS_BTREE_ERROR; + + if (acur->cnt) + xfs_btree_del_cursor(acur->cnt, cur_error); + if (acur->bnolt) + xfs_btree_del_cursor(acur->bnolt, cur_error); + if (acur->bnogt) + xfs_btree_del_cursor(acur->bnogt, cur_error); + acur->cnt = acur->bnolt = acur->bnogt = NULL; +} + +/* + * Check an extent for allocation and track the best available candidate in the + * allocation structure. The cursor is deactivated if it has entered an out of + * range state based on allocation arguments. Optionally return the extent + * extent geometry and allocation status if requested by the caller. + */ +static int +xfs_alloc_cur_check( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur, + struct xfs_btree_cur *cur, + int *new) +{ + int error, i; + xfs_agblock_t bno, bnoa, bnew; + xfs_extlen_t len, lena, diff = -1; + bool busy; + unsigned busy_gen = 0; + bool deactivate = false; + bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO; + + *new = 0; + + error = xfs_alloc_get_rec(cur, &bno, &len, &i); + if (error) + return error; + if (XFS_IS_CORRUPT(args->mp, i != 1)) + return -EFSCORRUPTED; + + /* + * Check minlen and deactivate a cntbt cursor if out of acceptable size + * range (i.e., walking backwards looking for a minlen extent). + */ + if (len < args->minlen) { + deactivate = !isbnobt; + goto out; + } + + busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena, + &busy_gen); + acur->busy |= busy; + if (busy) + acur->busy_gen = busy_gen; + /* deactivate a bnobt cursor outside of locality range */ + if (bnoa < args->min_agbno || bnoa > args->max_agbno) { + deactivate = isbnobt; + goto out; + } + if (lena < args->minlen) + goto out; + + args->len = XFS_EXTLEN_MIN(lena, args->maxlen); + xfs_alloc_fix_len(args); + ASSERT(args->len >= args->minlen); + if (args->len < acur->len) + goto out; + + /* + * We have an aligned record that satisfies minlen and beats or matches + * the candidate extent size. Compare locality for near allocation mode. + */ + ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); + diff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->datatype, + bnoa, lena, &bnew); + if (bnew == NULLAGBLOCK) + goto out; + + /* + * Deactivate a bnobt cursor with worse locality than the current best. + */ + if (diff > acur->diff) { + deactivate = isbnobt; + goto out; + } + + ASSERT(args->len > acur->len || + (args->len == acur->len && diff <= acur->diff)); + acur->rec_bno = bno; + acur->rec_len = len; + acur->bno = bnew; + acur->len = args->len; + acur->diff = diff; + *new = 1; + + /* + * We're done if we found a perfect allocation. This only deactivates + * the current cursor, but this is just an optimization to terminate a + * cntbt search that otherwise runs to the edge of the tree. + */ + if (acur->diff == 0 && acur->len == args->maxlen) + deactivate = true; +out: + if (deactivate) + cur->bc_private.a.priv.abt.active = false; + trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff, + *new); + return 0; +} + +/* + * Complete an allocation of a candidate extent. Remove the extent from both + * trees and update the args structure. */ +STATIC int +xfs_alloc_cur_finish( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur) +{ + int error; + + ASSERT(acur->cnt && acur->bnolt); + ASSERT(acur->bno >= acur->rec_bno); + ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len); + ASSERT(acur->rec_bno + acur->rec_len <= + be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + + error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno, + acur->rec_len, acur->bno, acur->len, 0); + if (error) + return error; + + args->agbno = acur->bno; + args->len = acur->len; + args->wasfromfl = 0; + + trace_xfs_alloc_cur(args); + return 0; +} + +/* + * Locality allocation lookup algorithm. This expects a cntbt cursor and uses + * bno optimized lookup to search for extents with ideal size and locality. + */ +STATIC int +xfs_alloc_cntbt_iter( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur) +{ + struct xfs_btree_cur *cur = acur->cnt; + xfs_agblock_t bno; + xfs_extlen_t len, cur_len; + int error; + int i; + + if (!xfs_alloc_cur_active(cur)) + return 0; + + /* locality optimized lookup */ + cur_len = acur->cur_len; + error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i); + if (error) + return error; + if (i == 0) + return 0; + error = xfs_alloc_get_rec(cur, &bno, &len, &i); + if (error) + return error; + + /* check the current record and update search length from it */ + error = xfs_alloc_cur_check(args, acur, cur, &i); + if (error) + return error; + ASSERT(len >= acur->cur_len); + acur->cur_len = len; + + /* + * We looked up the first record >= [agbno, len] above. The agbno is a + * secondary key and so the current record may lie just before or after + * agbno. If it is past agbno, check the previous record too so long as + * the length matches as it may be closer. Don't check a smaller record + * because that could deactivate our cursor. + */ + if (bno > args->agbno) { + error = xfs_btree_decrement(cur, 0, &i); + if (!error && i) { + error = xfs_alloc_get_rec(cur, &bno, &len, &i); + if (!error && i && len == acur->cur_len) + error = xfs_alloc_cur_check(args, acur, cur, + &i); + } + if (error) + return error; + } + + /* + * Increment the search key until we find at least one allocation + * candidate or if the extent we found was larger. Otherwise, double the + * search key to optimize the search. Efficiency is more important here + * than absolute best locality. + */ + cur_len <<= 1; + if (!acur->len || acur->cur_len >= cur_len) + acur->cur_len++; + else + acur->cur_len = cur_len; + + return error; +} /* * Deal with the case where only small freespaces remain. Either return the @@ -727,7 +1045,10 @@ xfs_alloc_ag_vextent_small( error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); if (error) goto error; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error); + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + error = -EFSCORRUPTED; + goto error; + } goto out; } @@ -744,13 +1065,13 @@ xfs_alloc_ag_vextent_small( goto out; xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, - xfs_alloc_allow_busy_reuse(args->datatype)); + (args->datatype & XFS_ALLOC_NOBUSY)); - if (xfs_alloc_is_userdata(args->datatype)) { + if (args->datatype & XFS_ALLOC_USERDATA) { struct xfs_buf *bp; bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); - if (!bp) { + if (XFS_IS_CORRUPT(args->mp, !bp)) { error = -EFSCORRUPTED; goto error; } @@ -758,9 +1079,12 @@ xfs_alloc_ag_vextent_small( } *fbnop = args->agbno = fbno; *flenp = args->len = 1; - XFS_WANT_CORRUPTED_GOTO(args->mp, - fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error); + if (XFS_IS_CORRUPT(args->mp, + fbno >= be32_to_cpu( + XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + error = -EFSCORRUPTED; + goto error; + } args->wasfromfl = 1; trace_xfs_alloc_small_freelist(args); @@ -915,7 +1239,10 @@ xfs_alloc_ag_vextent_exact( error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } ASSERT(fbno <= args->agbno); /* @@ -984,98 +1311,243 @@ error0: } /* - * Search the btree in a given direction via the search cursor and compare - * the records found against the good extent we've already found. + * Search a given number of btree records in a given direction. Check each + * record against the good extent we've already found. */ STATIC int -xfs_alloc_find_best_extent( - struct xfs_alloc_arg *args, /* allocation argument structure */ - struct xfs_btree_cur **gcur, /* good cursor */ - struct xfs_btree_cur **scur, /* searching cursor */ - xfs_agblock_t gdiff, /* difference for search comparison */ - xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, /* extent length */ - xfs_agblock_t *sbnoa, /* aligned extent found by search */ - xfs_extlen_t *slena, /* aligned extent length */ - int dir) /* 0 = search right, 1 = search left */ +xfs_alloc_walk_iter( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur, + struct xfs_btree_cur *cur, + bool increment, + bool find_one, /* quit on first candidate */ + int count, /* rec count (-1 for infinite) */ + int *stat) { - xfs_agblock_t new; - xfs_agblock_t sdiff; int error; int i; - unsigned busy_gen; - /* The good extent is perfect, no need to search. */ - if (!gdiff) - goto out_use_good; + *stat = 0; /* - * Look until we find a better one, run out of space or run off the end. + * Search so long as the cursor is active or we find a better extent. + * The cursor is deactivated if it extends beyond the range of the + * current allocation candidate. */ - do { - error = xfs_alloc_get_rec(*scur, sbno, slen, &i); + while (xfs_alloc_cur_active(cur) && count) { + error = xfs_alloc_cur_check(args, acur, cur, &i); if (error) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, *sbno, *slen, - sbnoa, slena, &busy_gen); + return error; + if (i == 1) { + *stat = 1; + if (find_one) + break; + } + if (!xfs_alloc_cur_active(cur)) + break; + + if (increment) + error = xfs_btree_increment(cur, 0, &i); + else + error = xfs_btree_decrement(cur, 0, &i); + if (error) + return error; + if (i == 0) + cur->bc_private.a.priv.abt.active = false; + + if (count > 0) + count--; + } + + return 0; +} + +/* + * Search the by-bno and by-size btrees in parallel in search of an extent with + * ideal locality based on the NEAR mode ->agbno locality hint. + */ +STATIC int +xfs_alloc_ag_vextent_locality( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur, + int *stat) +{ + struct xfs_btree_cur *fbcur = NULL; + int error; + int i; + bool fbinc; + + ASSERT(acur->len == 0); + ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); + + *stat = 0; + + error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i); + if (error) + return error; + error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i); + if (error) + return error; + error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i); + if (error) + return error; + + /* + * Search the bnobt and cntbt in parallel. Search the bnobt left and + * right and lookup the closest extent to the locality hint for each + * extent size key in the cntbt. The entire search terminates + * immediately on a bnobt hit because that means we've found best case + * locality. Otherwise the search continues until the cntbt cursor runs + * off the end of the tree. If no allocation candidate is found at this + * point, give up on locality, walk backwards from the end of the cntbt + * and take the first available extent. + * + * The parallel tree searches balance each other out to provide fairly + * consistent performance for various situations. The bnobt search can + * have pathological behavior in the worst case scenario of larger + * allocation requests and fragmented free space. On the other hand, the + * bnobt is able to satisfy most smaller allocation requests much more + * quickly than the cntbt. The cntbt search can sift through fragmented + * free space and sets of free extents for larger allocation requests + * more quickly than the bnobt. Since the locality hint is just a hint + * and we don't want to scan the entire bnobt for perfect locality, the + * cntbt search essentially bounds the bnobt search such that we can + * find good enough locality at reasonable performance in most cases. + */ + while (xfs_alloc_cur_active(acur->bnolt) || + xfs_alloc_cur_active(acur->bnogt) || + xfs_alloc_cur_active(acur->cnt)) { + + trace_xfs_alloc_cur_lookup(args); /* - * The good extent is closer than this one. + * Search the bnobt left and right. In the case of a hit, finish + * the search in the opposite direction and we're done. */ - if (!dir) { - if (*sbnoa > args->max_agbno) - goto out_use_good; - if (*sbnoa >= args->agbno + gdiff) - goto out_use_good; - } else { - if (*sbnoa < args->min_agbno) - goto out_use_good; - if (*sbnoa <= args->agbno - gdiff) - goto out_use_good; + error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false, + true, 1, &i); + if (error) + return error; + if (i == 1) { + trace_xfs_alloc_cur_left(args); + fbcur = acur->bnogt; + fbinc = true; + break; + } + error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true, + 1, &i); + if (error) + return error; + if (i == 1) { + trace_xfs_alloc_cur_right(args); + fbcur = acur->bnolt; + fbinc = false; + break; } /* - * Same distance, compare length and pick the best. + * Check the extent with best locality based on the current + * extent size search key and keep track of the best candidate. */ - if (*slena >= args->minlen) { - args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); - xfs_alloc_fix_len(args); - - sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, - args->datatype, *sbnoa, - *slena, &new); + error = xfs_alloc_cntbt_iter(args, acur); + if (error) + return error; + if (!xfs_alloc_cur_active(acur->cnt)) { + trace_xfs_alloc_cur_lookup_done(args); + break; + } + } - /* - * Choose closer size and invalidate other cursor. - */ - if (sdiff < gdiff) - goto out_use_search; - goto out_use_good; + /* + * If we failed to find anything due to busy extents, return empty + * handed so the caller can flush and retry. If no busy extents were + * found, walk backwards from the end of the cntbt as a last resort. + */ + if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) { + error = xfs_btree_decrement(acur->cnt, 0, &i); + if (error) + return error; + if (i) { + acur->cnt->bc_private.a.priv.abt.active = true; + fbcur = acur->cnt; + fbinc = false; } + } - if (!dir) - error = xfs_btree_increment(*scur, 0, &i); - else - error = xfs_btree_decrement(*scur, 0, &i); + /* + * Search in the opposite direction for a better entry in the case of + * a bnobt hit or walk backwards from the end of the cntbt. + */ + if (fbcur) { + error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1, + &i); if (error) - goto error0; - } while (i); + return error; + } -out_use_good: - xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); - *scur = NULL; - return 0; + if (acur->len) + *stat = 1; -out_use_search: - xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); - *gcur = NULL; return 0; +} -error0: - /* caller invalidates cursors */ - return error; +/* Check the last block of the cnt btree for allocations. */ +static int +xfs_alloc_ag_vextent_lastblock( + struct xfs_alloc_arg *args, + struct xfs_alloc_cur *acur, + xfs_agblock_t *bno, + xfs_extlen_t *len, + bool *allocated) +{ + int error; + int i; + +#ifdef DEBUG + /* Randomly don't execute the first algorithm. */ + if (prandom_u32() & 1) + return 0; +#endif + + /* + * Start from the entry that lookup found, sequence through all larger + * free blocks. If we're actually pointing at a record smaller than + * maxlen, go to the start of this block, and skip all those smaller + * than minlen. + */ + if (len || args->alignment > 1) { + acur->cnt->bc_ptrs[0] = 1; + do { + error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); + if (error) + return error; + if (XFS_IS_CORRUPT(args->mp, i != 1)) + return -EFSCORRUPTED; + if (*len >= args->minlen) + break; + error = xfs_btree_increment(acur->cnt, 0, &i); + if (error) + return error; + } while (i); + ASSERT(*len >= args->minlen); + if (!i) + return 0; + } + + error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i); + if (error) + return error; + + /* + * It didn't work. We COULD be in a case where there's a good record + * somewhere, so try again. + */ + if (acur->len == 0) + return 0; + + trace_xfs_alloc_near_first(args); + *allocated = true; + return 0; } /* @@ -1084,41 +1556,17 @@ error0: * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. */ -STATIC int /* error */ +STATIC int xfs_alloc_ag_vextent_near( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) { - xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ - xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ - xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ - xfs_agblock_t gtbno; /* start bno of right side entry */ - xfs_agblock_t gtbnoa; /* aligned ... */ - xfs_extlen_t gtdiff; /* difference to right side entry */ - xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena; /* aligned ... */ - xfs_agblock_t gtnew; /* useful start bno of right side */ - int error; /* error code */ - int i; /* result code, temporary */ - int j; /* result code, temporary */ - xfs_agblock_t ltbno; /* start bno of left side entry */ - xfs_agblock_t ltbnoa; /* aligned ... */ - xfs_extlen_t ltdiff; /* difference to left side entry */ - xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena; /* aligned ... */ - xfs_agblock_t ltnew; /* useful start bno of left side */ - xfs_extlen_t rlen; /* length of returned extent */ - bool busy; - unsigned busy_gen; -#ifdef DEBUG - /* - * Randomly don't execute the first algorithm. - */ - int dofirst; /* set to do first algorithm */ - - dofirst = prandom_u32() & 1; -#endif + struct xfs_alloc_cur acur = {}; + int error; /* error code */ + int i; /* result code, temporary */ + xfs_agblock_t bno; + xfs_extlen_t len; - /* handle unitialized agbno range so caller doesn't have to */ + /* handle uninitialized agbno range so caller doesn't have to */ if (!args->min_agbno && !args->max_agbno) args->max_agbno = args->mp->m_sb.sb_agblocks - 1; ASSERT(args->min_agbno <= args->max_agbno); @@ -1130,40 +1578,27 @@ xfs_alloc_ag_vextent_near( args->agbno = args->max_agbno; restart: - bno_cur_lt = NULL; - bno_cur_gt = NULL; - ltlen = 0; - gtlena = 0; - ltlena = 0; - busy = false; - - /* - * Get a cursor for the by-size btree. - */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); + len = 0; /* - * See if there are any free extents as big as maxlen. + * Set up cursors and see if there are any free extents as big as + * maxlen. If not, pick the last entry in the tree unless the tree is + * empty. */ - if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) - goto error0; - /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. - */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, - <len, &i))) - goto error0; - if (i == 0 || ltlen == 0) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + error = xfs_alloc_cur_setup(args, &acur); + if (error == -ENOSPC) { + error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno, + &len, &i); + if (error) + goto out; + if (i == 0 || len == 0) { trace_xfs_alloc_near_noentry(args); - return 0; + goto out; } ASSERT(i == 1); + } else if (error) { + goto out; } - args->wasfromfl = 0; /* * First algorithm. @@ -1172,311 +1607,47 @@ restart: * near the right edge of the tree. If it's in the last btree leaf * block, then we just examine all the entries in that block * that are big enough, and pick the best one. - * This is written as a while loop so we can break out of it, - * but we never loop back to the top. */ - while (xfs_btree_islastblock(cnt_cur, 0)) { - xfs_extlen_t bdiff; - int besti=0; - xfs_extlen_t blen=0; - xfs_agblock_t bnew=0; - -#ifdef DEBUG - if (dofirst) - break; -#endif - /* - * Start from the entry that lookup found, sequence through - * all larger free blocks. If we're actually pointing at a - * record smaller than maxlen, go to the start of this block, - * and skip all those smaller than minlen. - */ - if (ltlen || args->alignment > 1) { - cnt_cur->bc_ptrs[0] = 1; - do { - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, - <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - if (ltlen >= args->minlen) - break; - if ((error = xfs_btree_increment(cnt_cur, 0, &i))) - goto error0; - } while (i); - ASSERT(ltlen >= args->minlen); - if (!i) - break; - } - i = cnt_cur->bc_ptrs[0]; - for (j = 1, blen = 0, bdiff = 0; - !error && j && (blen < args->maxlen || bdiff > 0); - error = xfs_btree_increment(cnt_cur, 0, &j)) { - /* - * For each entry, decide if it's better than - * the previous best entry. - */ - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena, &busy_gen); - if (ltlena < args->minlen) - continue; - if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) - continue; - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - ASSERT(args->len >= args->minlen); - if (args->len < blen) - continue; - ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->datatype, ltbnoa, - ltlena, <new); - if (ltnew != NULLAGBLOCK && - (args->len > blen || ltdiff < bdiff)) { - bdiff = ltdiff; - bnew = ltnew; - blen = args->len; - besti = cnt_cur->bc_ptrs[0]; - } - } - /* - * It didn't work. We COULD be in a case where - * there's a good record somewhere, so try again. - */ - if (blen == 0) - break; - /* - * Point at the best entry, and retrieve it again. - */ - cnt_cur->bc_ptrs[0] = besti; - if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - args->len = blen; - - /* - * We are allocating starting at bnew for blen blocks. - */ - args->agbno = bnew; - ASSERT(bnew >= ltbno); - ASSERT(bnew + blen <= ltbno + ltlen); - /* - * Set up a cursor for the by-bno tree. - */ - bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO); - /* - * Fix up the btree entries. - */ - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, - ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) - goto error0; - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + if (xfs_btree_islastblock(acur.cnt, 0)) { + bool allocated = false; - trace_xfs_alloc_near_first(args); - return 0; - } - /* - * Second algorithm. - * Search in the by-bno tree to the left and to the right - * simultaneously, until in each case we find a space big enough, - * or run into the edge of the tree. When we run into the edge, - * we deallocate that cursor. - * If both searches succeed, we compare the two spaces and pick - * the better one. - * With alignment, it's possible for both to fail; the upper - * level algorithm that picks allocation groups for allocations - * is not supposed to do this. - */ - /* - * Allocate and initialize the cursor for the leftward search. - */ - bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); - /* - * Lookup <= bno to find the leftward search's starting point. - */ - if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) - goto error0; - if (!i) { - /* - * Didn't find anything; use this cursor for the rightward - * search. - */ - bno_cur_gt = bno_cur_lt; - bno_cur_lt = NULL; - } - /* - * Found something. Duplicate the cursor for the rightward search. - */ - else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) - goto error0; - /* - * Increment the cursor, so we will point at the entry just right - * of the leftward entry if any, or to the leftmost entry. - */ - if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) - goto error0; - if (!i) { - /* - * It failed, there are no rightward entries. - */ - xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); - bno_cur_gt = NULL; + error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len, + &allocated); + if (error) + goto out; + if (allocated) + goto alloc_finish; } - /* - * Loop going left with the leftward cursor, right with the - * rightward cursor, until either both directions give up or - * we find an entry at least as big as minlen. - */ - do { - if (bno_cur_lt) { - if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena, &busy_gen); - if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) - break; - if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) - goto error0; - if (!i || ltbnoa < args->min_agbno) { - xfs_btree_del_cursor(bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - } - } - if (bno_cur_gt) { - if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, - >bnoa, >lena, &busy_gen); - if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) - break; - if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) - goto error0; - if (!i || gtbnoa > args->max_agbno) { - xfs_btree_del_cursor(bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - } - } while (bno_cur_lt || bno_cur_gt); /* - * Got both cursors still active, need to find better entry. + * Second algorithm. Combined cntbt and bnobt search to find ideal + * locality. */ - if (bno_cur_lt && bno_cur_gt) { - if (ltlena >= args->minlen) { - /* - * Left side is good, look for a right side entry. - */ - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->datatype, ltbnoa, - ltlena, <new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, - >bnoa, >lena, - 0 /* search right */); - } else { - ASSERT(gtlena >= args->minlen); - - /* - * Right side is good, look for a left side entry. - */ - args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); - xfs_alloc_fix_len(args); - gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->datatype, gtbnoa, - gtlena, >new); - - error = xfs_alloc_find_best_extent(args, - &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, - <bnoa, <lena, - 1 /* search left */); - } - - if (error) - goto error0; - } + error = xfs_alloc_ag_vextent_locality(args, &acur, &i); + if (error) + goto out; /* * If we couldn't get anything, give up. */ - if (bno_cur_lt == NULL && bno_cur_gt == NULL) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - - if (busy) { + if (!acur.len) { + if (acur.busy) { trace_xfs_alloc_near_busy(args); - xfs_extent_busy_flush(args->mp, args->pag, busy_gen); + xfs_extent_busy_flush(args->mp, args->pag, + acur.busy_gen); goto restart; } trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; - return 0; + goto out; } - /* - * At this point we have selected a freespace entry, either to the - * left or to the right. If it's on the right, copy all the - * useful variables to the "left" set so we only have one - * copy of this code. - */ - if (bno_cur_gt) { - bno_cur_lt = bno_cur_gt; - bno_cur_gt = NULL; - ltbno = gtbno; - ltbnoa = gtbnoa; - ltlen = gtlen; - ltlena = gtlena; - j = 1; - } else - j = 0; - - /* - * Fix up the length and compute the useful address. - */ - args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); - xfs_alloc_fix_len(args); - rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, - args->datatype, ltbnoa, ltlena, <new); - ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltbnoa + ltlena); - ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); - args->agbno = ltnew; - - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, - ltnew, rlen, XFSA_FIXUP_BNO_OK))) - goto error0; - - if (j) - trace_xfs_alloc_near_greater(args); - else - trace_xfs_alloc_near_lesser(args); - - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - return 0; +alloc_finish: + /* fix up btrees on a successful allocation */ + error = xfs_alloc_cur_finish(args, &acur); - error0: - trace_xfs_alloc_near_error(args); - if (cnt_cur != NULL) - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); - if (bno_cur_lt != NULL) - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); - if (bno_cur_gt != NULL) - xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); +out: + xfs_alloc_cur_close(&acur, error); return error; } @@ -1545,7 +1716,10 @@ restart: error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); @@ -1579,8 +1753,13 @@ restart: * This can't happen in the second case above. */ rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || - (rlen <= flen && rbno + rlen <= fbno + flen), error0); + if (XFS_IS_CORRUPT(args->mp, + rlen != 0 && + (rlen > flen || + rbno + rlen > fbno + flen))) { + error = -EFSCORRUPTED; + goto error0; + } if (rlen < args->maxlen) { xfs_agblock_t bestfbno; xfs_extlen_t bestflen; @@ -1599,15 +1778,22 @@ restart: if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if (flen < bestrlen) break; busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); - XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || - (rlen <= flen && rbno + rlen <= fbno + flen), - error0); + if (XFS_IS_CORRUPT(args->mp, + rlen != 0 && + (rlen > flen || + rbno + rlen > fbno + flen))) { + error = -EFSCORRUPTED; + goto error0; + } if (rlen > bestrlen) { bestrlen = rlen; bestrbno = rbno; @@ -1620,7 +1806,10 @@ restart: if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } rlen = bestrlen; rbno = bestrbno; flen = bestflen; @@ -1643,7 +1832,10 @@ restart: xfs_alloc_fix_len(args); rlen = args->len; - XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); + if (XFS_IS_CORRUPT(args->mp, rlen > flen)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Allocate and initialize a cursor for the by-block tree. */ @@ -1657,10 +1849,13 @@ restart: cnt_cur = bno_cur = NULL; args->len = rlen; args->agbno = rbno; - XFS_WANT_CORRUPTED_GOTO(args->mp, - args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error0); + if (XFS_IS_CORRUPT(args->mp, + args->agbno + args->len > + be32_to_cpu( + XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + error = -EFSCORRUPTED; + goto error0; + } trace_xfs_alloc_size_done(args); return 0; @@ -1732,7 +1927,10 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * It's not contiguous, though. */ @@ -1744,8 +1942,10 @@ xfs_free_ag_extent( * space was invalid, it's (partly) already free. * Very bad. */ - XFS_WANT_CORRUPTED_GOTO(mp, - ltbno + ltlen <= bno, error0); + if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) { + error = -EFSCORRUPTED; + goto error0; + } } } /* @@ -1760,7 +1960,10 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * It's not contiguous, though. */ @@ -1772,7 +1975,10 @@ xfs_free_ag_extent( * space was invalid, it's (partly) already free. * Very bad. */ - XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); + if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) { + error = -EFSCORRUPTED; + goto error0; + } } } /* @@ -1789,31 +1995,49 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Delete the old by-size entry on the right. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Delete the old by-block entry for the right block. */ if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Move the by-block cursor back to the left neighbor. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } #ifdef DEBUG /* * Check that this is the right record: delete didn't @@ -1826,9 +2050,13 @@ xfs_free_ag_extent( if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, - i == 1 && xxbno == ltbno && xxlen == ltlen, - error0); + if (XFS_IS_CORRUPT(mp, + i != 1 || + xxbno != ltbno || + xxlen != ltlen)) { + error = -EFSCORRUPTED; + goto error0; + } } #endif /* @@ -1849,17 +2077,26 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Back up the by-block cursor to the left neighbor, and * update its length. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } nbno = ltbno; nlen = len + ltlen; if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) @@ -1875,10 +2112,16 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Update the starting block and length of the right * neighbor in the by-block tree. @@ -1897,7 +2140,10 @@ xfs_free_ag_extent( nlen = len; if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); bno_cur = NULL; @@ -1906,10 +2152,16 @@ xfs_free_ag_extent( */ if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; @@ -1989,7 +2241,8 @@ xfs_alloc_longest_free_extent( * reservations and AGFL rules in place, we can return this extent. */ if (pag->pagf_longest > delta) - return pag->pagf_longest - delta; + return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable, + pag->pagf_longest - delta); /* Otherwise, let the caller try for 1 block if there's space. */ return pag->pagf_flcount > 0 || pag->pagf_longest > 0; @@ -2087,7 +2340,7 @@ xfs_free_agfl_block( return error; bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); - if (!bp) + if (XFS_IS_CORRUPT(tp->t_mountp, !bp)) return -EFSCORRUPTED; xfs_trans_binval(tp, bp); @@ -2253,7 +2506,7 @@ xfs_alloc_fix_freelist( * somewhere else if we are not being asked to try harder at this * point */ - if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) && + if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && (flags & XFS_ALLOC_FLAG_TRYLOCK)) { ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; @@ -2956,13 +3209,6 @@ xfs_alloc_vextent( args->len); #endif - /* Zero the extent if we were asked to do so */ - if (args->datatype & XFS_ALLOC_USERDATA_ZERO) { - error = xfs_zero_extent(args->ip, args->fsbno, args->len); - if (error) - goto error0; - } - } xfs_perag_put(args->pag); return 0; @@ -3038,12 +3284,18 @@ __xfs_free_extent( if (error) return error; - XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err); + if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { + error = -EFSCORRUPTED; + goto err; + } /* validate the extent size is legal now we have the agf locked */ - XFS_WANT_CORRUPTED_GOTO(mp, - agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), - err); + if (XFS_IS_CORRUPT(mp, + agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) { + error = -EFSCORRUPTED; + goto err; + } error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); if (error) diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index d6ed5d2c07c2..7380fbe4a3ff 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -54,7 +54,6 @@ typedef struct xfs_alloc_arg { struct xfs_mount *mp; /* file system mount point */ struct xfs_buf *agbp; /* buffer for a.g. freelist header */ struct xfs_perag *pag; /* per-ag struct for this agno */ - struct xfs_inode *ip; /* for userdata zeroing method */ xfs_fsblock_t fsbno; /* file system block number */ xfs_agnumber_t agno; /* allocation group number */ xfs_agblock_t agbno; /* allocation group-relative block # */ @@ -83,20 +82,7 @@ typedef struct xfs_alloc_arg { */ #define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ -#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ -#define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */ - -static inline bool -xfs_alloc_is_userdata(int datatype) -{ - return (datatype & ~XFS_ALLOC_NOBUSY) != 0; -} - -static inline bool -xfs_alloc_allow_busy_reuse(int datatype) -{ - return (datatype & XFS_ALLOC_NOBUSY) == 0; -} +#define XFS_ALLOC_NOBUSY (1 << 2)/* Busy extents not allowed */ /* freespace limit calculations */ #define XFS_ALLOC_AGFL_RESERVE 4 diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 2a94543857a1..279694d73e4e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -507,6 +507,7 @@ xfs_allocbt_init_cursor( cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; + cur->bc_private.a.priv.abt.active = false; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 510ca6974604..0d7fcc983b3d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -589,7 +589,7 @@ xfs_attr_leaf_addname( */ dp = args->dp; args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); if (error) return error; @@ -715,7 +715,7 @@ xfs_attr_leaf_addname( * remove the "old" attr from that block (neat, huh!) */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, - -1, &bp); + &bp); if (error) return error; @@ -769,7 +769,7 @@ xfs_attr_leaf_removename( */ dp = args->dp; args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); if (error) return error; @@ -813,7 +813,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args) trace_xfs_attr_leaf_get(args); args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); if (error) return error; @@ -1173,7 +1173,7 @@ xfs_attr_node_removename( ASSERT(state->path.blk[0].bp); state->path.blk[0].bp = NULL; - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); if (error) goto out; @@ -1266,10 +1266,9 @@ xfs_attr_refillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->disk_blkno) { - error = xfs_da3_node_read(state->args->trans, - state->args->dp, - blk->blkno, blk->disk_blkno, - &blk->bp, XFS_ATTR_FORK); + error = xfs_da3_node_read_mapped(state->args->trans, + state->args->dp, blk->disk_blkno, + &blk->bp, XFS_ATTR_FORK); if (error) return error; } else { @@ -1285,10 +1284,9 @@ xfs_attr_refillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->disk_blkno) { - error = xfs_da3_node_read(state->args->trans, - state->args->dp, - blk->blkno, blk->disk_blkno, - &blk->bp, XFS_ATTR_FORK); + error = xfs_da3_node_read_mapped(state->args->trans, + state->args->dp, blk->disk_blkno, + &blk->bp, XFS_ATTR_FORK); if (error) return error; } else { diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index f0089e862216..08d4b10ae2d5 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -233,6 +233,61 @@ xfs_attr3_leaf_hdr_to_disk( } static xfs_failaddr_t +xfs_attr3_leaf_verify_entry( + struct xfs_mount *mp, + char *buf_end, + struct xfs_attr_leafblock *leaf, + struct xfs_attr3_icleaf_hdr *leafhdr, + struct xfs_attr_leaf_entry *ent, + int idx, + __u32 *last_hashval) +{ + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + char *name_end; + unsigned int nameidx; + unsigned int namesize; + __u32 hashval; + + /* hash order check */ + hashval = be32_to_cpu(ent->hashval); + if (hashval < *last_hashval) + return __this_address; + *last_hashval = hashval; + + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr->firstused || nameidx >= mp->m_attr_geo->blksize) + return __this_address; + + /* + * Check the name information. The namelen fields are u8 so we can't + * possibly exceed the maximum name length of 255 bytes. + */ + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, idx); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, + be16_to_cpu(lentry->valuelen)); + name_end = (char *)lentry + namesize; + if (lentry->namelen == 0) + return __this_address; + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, idx); + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + name_end = (char *)rentry + namesize; + if (rentry->namelen == 0) + return __this_address; + if (!(ent->flags & XFS_ATTR_INCOMPLETE) && + rentry->valueblk == 0) + return __this_address; + } + + if (name_end > buf_end) + return __this_address; + + return NULL; +} + +static xfs_failaddr_t xfs_attr3_leaf_verify( struct xfs_buf *bp) { @@ -240,7 +295,10 @@ xfs_attr3_leaf_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entries; + struct xfs_attr_leaf_entry *ent; + char *buf_end; uint32_t end; /* must be 32bit - see below */ + __u32 last_hashval = 0; int i; xfs_failaddr_t fa; @@ -273,8 +331,13 @@ xfs_attr3_leaf_verify( (char *)bp->b_addr + ichdr.firstused) return __this_address; - /* XXX: need to range check rest of attr header values */ - /* XXX: hash order check? */ + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < ichdr.count; ent++, i++) { + fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr, + ent, i, &last_hashval); + if (fa) + return fa; + } /* * Quickly check the freemap information. Attribute data has to be @@ -367,13 +430,12 @@ xfs_attr3_leaf_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mappedbno, struct xfs_buf **bpp) { int err; - err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, - XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); + err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK, + &xfs_attr3_leaf_buf_ops); if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); return err; @@ -453,13 +515,15 @@ xfs_attr_copy_value( * special case for dev/uuid inodes, they have fixed size data forks. */ int -xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) +xfs_attr_shortform_bytesfit( + struct xfs_inode *dp, + int bytes) { - int offset; - int minforkoff; /* lower limit on valid forkoff locations */ - int maxforkoff; /* upper limit on valid forkoff locations */ - int dsize; - xfs_mount_t *mp = dp->i_mount; + struct xfs_mount *mp = dp->i_mount; + int64_t dsize; + int minforkoff; + int maxforkoff; + int offset; /* rounded down */ offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; @@ -525,7 +589,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) * A data fork btree root must have space for at least * MINDBTPTRS key/ptr pairs if the data fork is small or empty. */ - minforkoff = max(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); + minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ @@ -764,7 +828,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) } /* - * Retreive the attribute value and length. + * Retrieve the attribute value and length. * * If ATTR_KERNOVAL is specified, only the length needs to be returned. * Unlike a lookup, we only return an error if the attribute does not @@ -924,7 +988,7 @@ xfs_attr_shortform_verify( char *endp; struct xfs_ifork *ifp; int i; - int size; + int64_t size; ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); @@ -1080,7 +1144,6 @@ xfs_attr3_leaf_to_node( struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr icleafhdr; struct xfs_attr_leaf_entry *entries; - struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr icnodehdr; struct xfs_da_intnode *node; struct xfs_inode *dp = args->dp; @@ -1095,11 +1158,11 @@ xfs_attr3_leaf_to_node( error = xfs_da_grow_inode(args, &blkno); if (error) goto out; - error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1); + error = xfs_attr3_leaf_read(args->trans, dp, 0, &bp1); if (error) goto out; - error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK); + error = xfs_da_get_buf(args->trans, dp, blkno, &bp2, XFS_ATTR_FORK); if (error) goto out; @@ -1120,18 +1183,17 @@ xfs_attr3_leaf_to_node( if (error) goto out; node = bp1->b_addr; - dp->d_ops->node_hdr_from_disk(&icnodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(mp, &icnodehdr, node); leaf = bp2->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); /* both on-disk, don't endian-flip twice */ - btree[0].hashval = entries[icleafhdr.count - 1].hashval; - btree[0].before = cpu_to_be32(blkno); + icnodehdr.btree[0].hashval = entries[icleafhdr.count - 1].hashval; + icnodehdr.btree[0].before = cpu_to_be32(blkno); icnodehdr.count = 1; - dp->d_ops->node_hdr_to_disk(node, &icnodehdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, node, &icnodehdr); xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); error = 0; out: @@ -1161,7 +1223,7 @@ xfs_attr3_leaf_create( trace_xfs_attr_leaf_create(args); - error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, + error = xfs_da_get_buf(args->trans, args->dp, blkno, &bp, XFS_ATTR_FORK); if (error) return error; @@ -1447,7 +1509,9 @@ xfs_attr3_leaf_add_work( for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { if (ichdr->freemap[i].base == tmp) { ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); - ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t); + ichdr->freemap[i].size -= + min_t(uint16_t, ichdr->freemap[i].size, + sizeof(xfs_attr_leaf_entry_t)); } } ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); @@ -1931,7 +1995,7 @@ xfs_attr3_leaf_toosmall( if (blkno == 0) continue; error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, - blkno, -1, &bp); + blkno, &bp); if (error) return error; @@ -2281,8 +2345,10 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - if (ichdr.count >= args->geo->blksize / 8) + if (ichdr.count >= args->geo->blksize / 8) { + xfs_buf_corruption_error(bp); return -EFSCORRUPTED; + } /* * Binary search. (note: small blocks will skip this loop) @@ -2298,10 +2364,14 @@ xfs_attr3_leaf_lookup_int( else break; } - if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { + xfs_buf_corruption_error(bp); return -EFSCORRUPTED; - if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) + } + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { + xfs_buf_corruption_error(bp); return -EFSCORRUPTED; + } /* * Since we may have duplicate hashval's, find the first matching @@ -2661,7 +2731,7 @@ xfs_attr3_leaf_clearflag( /* * Set up the operation. */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); if (error) return error; @@ -2728,7 +2798,7 @@ xfs_attr3_leaf_setflag( /* * Set up the operation. */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); if (error) return error; @@ -2790,7 +2860,7 @@ xfs_attr3_leaf_flipflags( /* * Read the block containing the "old" attr */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp1); if (error) return error; @@ -2799,7 +2869,7 @@ xfs_attr3_leaf_flipflags( */ if (args->blkno2 != args->blkno) { error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2, - -1, &bp2); + &bp2); if (error) return error; } else { diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 7b74e18becff..f4a188e28b7b 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -17,6 +17,29 @@ struct xfs_inode; struct xfs_trans; /* + * Incore version of the attribute leaf header. + */ +struct xfs_attr3_icleaf_hdr { + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t usedbytes; + /* + * Firstused is 32-bit here instead of 16-bit like the on-disk variant + * to support maximum fsb size of 64k without overflow issues throughout + * the attr code. Instead, the overflow condition is handled on + * conversion to/from disk. + */ + uint32_t firstused; + __u8 holes; + struct { + uint16_t base; + uint16_t size; + } freemap[XFS_ATTR_LEAF_MAPSIZE]; +}; + +/* * Used to keep a list of "remote value" extents when unlinking an inode. */ typedef struct xfs_attr_inactive_list { @@ -67,8 +90,8 @@ int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -void xfs_attr3_leaf_list_int(struct xfs_buf *bp, - struct xfs_attr_list_context *context); +int xfs_attr3_leaf_list_int(struct xfs_buf *bp, + struct xfs_attr_list_context *context); /* * Routines used for shrinking the Btree. @@ -85,8 +108,7 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mappedbno, - struct xfs_buf **bpp); + xfs_dablk_t bno, struct xfs_buf **bpp); void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo, struct xfs_attr3_icleaf_hdr *to, struct xfs_attr_leafblock *from); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 3e39b7d40f25..a6ef5df42669 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -19,6 +19,7 @@ #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_attr.h" +#include "xfs_attr_remote.h" #include "xfs_trace.h" #include "xfs_error.h" diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c index 7071ff98fdbc..40ce5f3094d1 100644 --- a/fs/xfs/libxfs/xfs_bit.c +++ b/fs/xfs/libxfs/xfs_bit.c @@ -5,6 +5,7 @@ */ #include "xfs.h" #include "xfs_log_format.h" +#include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ef75e223cb70..4acc6e37c31d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -384,8 +384,10 @@ xfs_bmap_check_leaf_extents( xfs_check_block(block, mp, 0, 0); pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(mp, - xfs_verify_fsbno(mp, bno), error0); + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { + error = -EFSCORRUPTED; + goto error0; + } if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -612,8 +614,8 @@ xfs_bmap_btree_to_extents( pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); #ifdef DEBUG - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, - xfs_btree_check_lptr(cur, cbno, 1)); + if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1))) + return -EFSCORRUPTED; #endif error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); @@ -729,7 +731,7 @@ xfs_bmap_extents_to_btree( ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno); - if (!abp) { + if (XFS_IS_CORRUPT(mp, !abp)) { error = -EFSCORRUPTED; goto out_unreserve_dquot; } @@ -937,7 +939,10 @@ xfs_bmap_add_attrfork_btree( if (error) goto error0; /* must be at least one entry */ - XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); + if (XFS_IS_CORRUPT(mp, stat != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if ((error = xfs_btree_new_iroot(cur, flags, &stat))) goto error0; if (stat == 0) { @@ -1084,7 +1089,7 @@ xfs_bmap_add_attrfork( goto trans_cancel; if (XFS_IFORK_Q(ip)) goto trans_cancel; - if (ip->i_d.di_anextents != 0) { + if (XFS_IS_CORRUPT(mp, ip->i_d.di_anextents != 0)) { error = -EFSCORRUPTED; goto trans_cancel; } @@ -1155,6 +1160,65 @@ trans_cancel: * Internal and external extent tree search functions. */ +struct xfs_iread_state { + struct xfs_iext_cursor icur; + xfs_extnum_t loaded; +}; + +/* Stuff every bmbt record from this block into the incore extent map. */ +static int +xfs_iread_bmbt_block( + struct xfs_btree_cur *cur, + int level, + void *priv) +{ + struct xfs_iread_state *ir = priv; + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_btree_block *block; + struct xfs_buf *bp; + struct xfs_bmbt_rec *frp; + xfs_extnum_t num_recs; + xfs_extnum_t j; + int whichfork = cur->bc_private.b.whichfork; + + block = xfs_btree_get_block(cur, level, &bp); + + /* Abort if we find more records than nextents. */ + num_recs = xfs_btree_get_numrecs(block); + if (unlikely(ir->loaded + num_recs > + XFS_IFORK_NEXTENTS(ip, whichfork))) { + xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).", + (unsigned long long)ip->i_ino); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, + sizeof(*block), __this_address); + return -EFSCORRUPTED; + } + + /* Copy records into the incore cache. */ + frp = XFS_BMBT_REC_ADDR(mp, block, 1); + for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { + struct xfs_bmbt_irec new; + xfs_failaddr_t fa; + + xfs_bmbt_disk_get_all(frp, &new); + fa = xfs_bmap_validate_extent(ip, whichfork, &new); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iread_extents(2)", frp, + sizeof(*frp), fa); + return -EFSCORRUPTED; + } + xfs_iext_insert(ip, &ir->icur, &new, + xfs_bmap_fork_to_state(whichfork)); + trace_xfs_read_extent(ip, &ir->icur, + xfs_bmap_fork_to_state(whichfork), _THIS_IP_); + xfs_iext_next(XFS_IFORK_PTR(ip, whichfork), &ir->icur); + } + + return 0; +} + /* * Read in extents from a btree-format inode. */ @@ -1164,134 +1228,39 @@ xfs_iread_extents( struct xfs_inode *ip, int whichfork) { - struct xfs_mount *mp = ip->i_mount; - int state = xfs_bmap_fork_to_state(whichfork); + struct xfs_iread_state ir; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); - struct xfs_btree_block *block = ifp->if_broot; - struct xfs_iext_cursor icur; - struct xfs_bmbt_irec new; - xfs_fsblock_t bno; - struct xfs_buf *bp; - xfs_extnum_t i, j; - int level; - __be64 *pp; + struct xfs_mount *mp = ip->i_mount; + struct xfs_btree_cur *cur; int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - - /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. - */ - level = be16_to_cpu(block->bb_level); - if (unlikely(level == 0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - - /* - * Go down the tree until leaf level is reached, following the first - * pointer (leftmost) at each level. - */ - while (level-- > 0) { - error = xfs_btree_read_bufl(mp, tp, bno, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); - if (error) - goto out; - block = XFS_BUF_TO_BLOCK(bp); - if (level == 0) - break; - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - XFS_WANT_CORRUPTED_GOTO(mp, - xfs_verify_fsbno(mp, bno), out_brelse); - xfs_trans_brelse(tp, bp); + if (XFS_IS_CORRUPT(mp, + XFS_IFORK_FORMAT(ip, whichfork) != + XFS_DINODE_FMT_BTREE)) { + error = -EFSCORRUPTED; + goto out; } - /* - * Here with bp and block set to the leftmost leaf node in the tree. - */ - i = 0; - xfs_iext_first(ifp, &icur); - - /* - * Loop over all leaf nodes. Copy information to the extent records. - */ - for (;;) { - xfs_bmbt_rec_t *frp; - xfs_fsblock_t nextbno; - xfs_extnum_t num_recs; - - num_recs = xfs_btree_get_numrecs(block); - if (unlikely(i + num_recs > nextents)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, (btree extents).", - (unsigned long long) ip->i_ino); - xfs_inode_verifier_error(ip, -EFSCORRUPTED, - __func__, block, sizeof(*block), - __this_address); - error = -EFSCORRUPTED; - goto out_brelse; - } - /* - * Read-ahead the next leaf block, if any. - */ - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - if (nextbno != NULLFSBLOCK) - xfs_btree_reada_bufl(mp, nextbno, 1, - &xfs_bmbt_buf_ops); - /* - * Copy records into the extent records. - */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); - for (j = 0; j < num_recs; j++, frp++, i++) { - xfs_failaddr_t fa; - - xfs_bmbt_disk_get_all(frp, &new); - fa = xfs_bmap_validate_extent(ip, whichfork, &new); - if (fa) { - error = -EFSCORRUPTED; - xfs_inode_verifier_error(ip, error, - "xfs_iread_extents(2)", - frp, sizeof(*frp), fa); - goto out_brelse; - } - xfs_iext_insert(ip, &icur, &new, state); - trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); - xfs_iext_next(ifp, &icur); - } - xfs_trans_brelse(tp, bp); - bno = nextbno; - /* - * If we've reached the end, stop. - */ - if (bno == NULLFSBLOCK) - break; - error = xfs_btree_read_bufl(mp, tp, bno, &bp, - XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); - if (error) - goto out; - block = XFS_BUF_TO_BLOCK(bp); - } + ir.loaded = 0; + xfs_iext_first(ifp, &ir.icur); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block, + XFS_BTREE_VISIT_RECORDS, &ir); + xfs_btree_del_cursor(cur, error); + if (error) + goto out; - if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) { + if (XFS_IS_CORRUPT(mp, + ir.loaded != XFS_IFORK_NEXTENTS(ip, whichfork))) { error = -EFSCORRUPTED; goto out; } - ASSERT(i == xfs_iext_count(ifp)); + ASSERT(ir.loaded == xfs_iext_count(ifp)); ifp->if_flags |= XFS_IFEXTENTS; return 0; - -out_brelse: - xfs_trans_brelse(tp, bp); out: xfs_iext_destroy(ifp); return error; @@ -1318,8 +1287,7 @@ xfs_bmap_first_unused( xfs_fileoff_t lowest, max; int error; - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || + ASSERT(xfs_ifork_has_extents(ip, whichfork) || XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { @@ -1375,7 +1343,8 @@ xfs_bmap_last_before( case XFS_DINODE_FMT_EXTENTS: break; default: - return -EIO; + ASSERT(0); + return -EFSCORRUPTED; } if (!(ifp->if_flags & XFS_IFEXTENTS)) { @@ -1474,9 +1443,8 @@ xfs_bmap_last_offset( if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) return 0; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return -EIO; + if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ip, whichfork))) + return -EFSCORRUPTED; error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); if (error || is_empty) @@ -1653,15 +1621,24 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_delete(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(bma->cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; @@ -1687,7 +1664,10 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; @@ -1717,7 +1697,10 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(bma->cur, &PREV); if (error) goto done; @@ -1742,11 +1725,17 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } break; @@ -1777,7 +1766,10 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; @@ -1798,11 +1790,17 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } if (xfs_bmap_needs_btree(bma->ip, whichfork)) { @@ -1843,7 +1841,10 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(bma->cur, &RIGHT); if (error) goto done; @@ -1875,11 +1876,17 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } if (xfs_bmap_needs_btree(bma->ip, whichfork)) { @@ -1955,11 +1962,17 @@ xfs_bmap_add_extent_delay_real( error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_insert(bma->cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } if (xfs_bmap_needs_btree(bma->ip, whichfork)) { @@ -2153,19 +2166,34 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &LEFT); if (error) goto done; @@ -2191,13 +2219,22 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &PREV, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &LEFT); if (error) goto done; @@ -2226,13 +2263,22 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; @@ -2255,7 +2301,10 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; @@ -2285,7 +2334,10 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; @@ -2319,14 +2371,20 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; cur->bc_rec.b = *new; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } break; @@ -2353,7 +2411,10 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; @@ -2387,17 +2448,26 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &PREV); if (error) goto done; error = xfs_bmbt_lookup_eq(cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } break; @@ -2431,7 +2501,10 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } /* new right extent - oldext */ error = xfs_bmbt_update(cur, &r[1]); if (error) @@ -2440,7 +2513,10 @@ xfs_bmap_add_extent_unwritten_real( cur->bc_rec.b = PREV; if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } /* * Reset the cursor to the position of the new extent * we are about to insert as we can't trust it after @@ -2449,11 +2525,17 @@ xfs_bmap_add_extent_unwritten_real( error = xfs_bmbt_lookup_eq(cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } /* new middle extent - newext */ if ((error = xfs_btree_insert(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } break; @@ -2736,15 +2818,24 @@ xfs_bmap_add_extent_hole_real( error = xfs_bmbt_lookup_eq(cur, &right, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_delete(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &left); if (error) goto done; @@ -2770,7 +2861,10 @@ xfs_bmap_add_extent_hole_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &left); if (error) goto done; @@ -2797,7 +2891,10 @@ xfs_bmap_add_extent_hole_real( error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_bmbt_update(cur, &right); if (error) goto done; @@ -2820,11 +2917,17 @@ xfs_bmap_add_extent_hole_real( error = xfs_bmbt_lookup_eq(cur, new, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_insert(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } break; } @@ -3059,7 +3162,7 @@ xfs_bmap_adjacent( mp = ap->ip->i_mount; nullfb = ap->tp->t_firstblock == NULLFSBLOCK; rt = XFS_IS_REALTIME_INODE(ap->ip) && - xfs_alloc_is_userdata(ap->datatype); + (ap->datatype & XFS_ALLOC_USERDATA); fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock); /* @@ -3412,7 +3515,7 @@ xfs_bmap_btalloc( if (ap->flags & XFS_BMAPI_COWFORK) align = xfs_get_cowextsz_hint(ap->ip); - else if (xfs_alloc_is_userdata(ap->datatype)) + else if (ap->datatype & XFS_ALLOC_USERDATA) align = xfs_get_extsz_hint(ap->ip); if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -3427,7 +3530,7 @@ xfs_bmap_btalloc( fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock); if (nullfb) { - if (xfs_alloc_is_userdata(ap->datatype) && + if ((ap->datatype & XFS_ALLOC_USERDATA) && xfs_inode_is_filestream(ap->ip)) { ag = xfs_filestream_lookup_ag(ap->ip); ag = (ag != NULLAGNUMBER) ? ag : 0; @@ -3467,7 +3570,7 @@ xfs_bmap_btalloc( * enough for the request. If one isn't found, then adjust * the minimum allocation size to the largest space found. */ - if (xfs_alloc_is_userdata(ap->datatype) && + if ((ap->datatype & XFS_ALLOC_USERDATA) && xfs_inode_is_filestream(ap->ip)) error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); else @@ -3501,13 +3604,11 @@ xfs_bmap_btalloc( args.mod = args.prod - args.mod; } /* - * If we are not low on available data blocks, and the - * underlying logical volume manager is a stripe, and - * the file offset is zero then try to allocate data - * blocks on stripe unit boundary. - * NOTE: ap->aeof is only set if the allocation length - * is >= the stripe unit and the allocation offset is - * at the end of file. + * If we are not low on available data blocks, and the underlying + * logical volume manager is a stripe, and the file offset is zero then + * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof + * is only set if the allocation length is >= the stripe unit and the + * allocation offset is at the end of file. */ if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) { if (!ap->offset) { @@ -3515,9 +3616,11 @@ xfs_bmap_btalloc( atype = args.type; isaligned = 1; /* - * Adjust for alignment + * Adjust minlen to try and preserve alignment if we + * can't guarantee an aligned maxlen extent. */ - if (blen > args.alignment && blen <= args.maxlen) + if (blen > args.alignment && + blen <= args.maxlen + args.alignment) args.minlen = blen - args.alignment; args.minalignslop = 0; } else { @@ -3555,8 +3658,6 @@ xfs_bmap_btalloc( args.wasdel = ap->wasdel; args.resv = XFS_AG_RESV_NONE; args.datatype = ap->datatype; - if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) - args.ip = ap->ip; error = xfs_alloc_vextent(&args); if (error) @@ -3641,20 +3742,6 @@ xfs_bmap_btalloc( return 0; } -/* - * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. - * It figures out where to ask the underlying allocator to put the new extent. - */ -STATIC int -xfs_bmap_alloc( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ -{ - if (XFS_IS_REALTIME_INODE(ap->ip) && - xfs_alloc_is_userdata(ap->datatype)) - return xfs_bmap_rtalloc(ap); - return xfs_bmap_btalloc(ap); -} - /* Trim extent to fit a logical block range. */ void xfs_trim_extent( @@ -3816,11 +3903,8 @@ xfs_bmapi_read( XFS_BMAPI_COWFORK))); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -4011,6 +4095,39 @@ out_unreserve_quota: } static int +xfs_bmap_alloc_userdata( + struct xfs_bmalloca *bma) +{ + struct xfs_mount *mp = bma->ip->i_mount; + int whichfork = xfs_bmapi_whichfork(bma->flags); + int error; + + /* + * Set the data type being allocated. For the data fork, the first data + * in the file is treated differently to all other allocations. For the + * attribute fork, we only need to ensure the allocated range is not on + * the busy list. + */ + bma->datatype = XFS_ALLOC_NOBUSY; + if (whichfork == XFS_DATA_FORK) { + bma->datatype |= XFS_ALLOC_USERDATA; + if (bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; + + if (mp->m_dalign && bma->length >= mp->m_dalign) { + error = xfs_bmap_isaeof(bma, whichfork); + if (error) + return error; + } + + if (XFS_IS_REALTIME_INODE(bma->ip)) + return xfs_bmap_rtalloc(bma); + } + + return xfs_bmap_btalloc(bma); +} + +static int xfs_bmapi_allocate( struct xfs_bmalloca *bma) { @@ -4029,7 +4146,8 @@ xfs_bmapi_allocate( if (bma->wasdel) { bma->length = (xfs_extlen_t)bma->got.br_blockcount; bma->offset = bma->got.br_startoff; - xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev); + if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev)) + bma->prev.br_startoff = NULLFILEOFF; } else { bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); if (!bma->eof) @@ -4037,43 +4155,24 @@ xfs_bmapi_allocate( bma->got.br_startoff - bma->offset); } - /* - * Set the data type being allocated. For the data fork, the first data - * in the file is treated differently to all other allocations. For the - * attribute fork, we only need to ensure the allocated range is not on - * the busy list. - */ - if (!(bma->flags & XFS_BMAPI_METADATA)) { - bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { - if (bma->offset == 0) - bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; - else - bma->datatype |= XFS_ALLOC_USERDATA; - } - if (bma->flags & XFS_BMAPI_ZERO) - bma->datatype |= XFS_ALLOC_USERDATA_ZERO; - } + if (bma->flags & XFS_BMAPI_CONTIG) + bma->minlen = bma->length; + else + bma->minlen = 1; - bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; + if (bma->flags & XFS_BMAPI_METADATA) + error = xfs_bmap_btalloc(bma); + else + error = xfs_bmap_alloc_userdata(bma); + if (error || bma->blkno == NULLFSBLOCK) + return error; - /* - * Only want to do the alignment at the eof if it is userdata and - * allocation length is larger than a stripe unit. - */ - if (mp->m_dalign && bma->length >= mp->m_dalign && - !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { - error = xfs_bmap_isaeof(bma, whichfork); + if (bma->flags & XFS_BMAPI_ZERO) { + error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); if (error) return error; } - error = xfs_bmap_alloc(bma); - if (error) - return error; - - if (bma->blkno == NULLFSBLOCK) - return 0; if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); /* @@ -4313,11 +4412,8 @@ xfs_bmapi_write( ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -4511,7 +4607,6 @@ xfs_bmapi_convert_delalloc( bma.wasdel = true; bma.offset = bma.got.br_startoff; bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN); - bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); if (whichfork == XFS_COW_FORK) bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; @@ -4584,11 +4679,8 @@ xfs_bmapi_remap( ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5019,7 +5111,10 @@ xfs_bmap_del_extent_real( error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } if (got.br_startoff == del->br_startoff) @@ -5043,7 +5138,10 @@ xfs_bmap_del_extent_real( } if ((error = xfs_btree_delete(cur, &i))) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } break; case BMAP_LEFT_FILLING: /* @@ -5114,7 +5212,10 @@ xfs_bmap_del_extent_real( error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } /* * Update the btree record back * to the original value. @@ -5131,7 +5232,10 @@ xfs_bmap_del_extent_real( error = -ENOSPC; goto done; } - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } } else flags |= xfs_ilog_fext(whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, @@ -5198,7 +5302,7 @@ __xfs_bunmapi( int isrt; /* freeing in rt area */ int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ - struct xfs_mount *mp; /* mount structure */ + struct xfs_mount *mp = ip->i_mount; int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ @@ -5215,14 +5319,8 @@ __xfs_bunmapi( whichfork = xfs_bmapi_whichfork(flags); ASSERT(whichfork != XFS_COW_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); - if (unlikely( - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, - ip->i_mount); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork))) return -EFSCORRUPTED; - } - mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -5616,18 +5714,21 @@ xfs_bmse_merge( error = xfs_bmbt_lookup_eq(cur, got, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; error = xfs_btree_delete(cur, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; /* lookup and update size of the previous extent */ error = xfs_bmbt_lookup_eq(cur, left, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; error = xfs_bmbt_update(cur, &new); if (error) @@ -5675,7 +5776,8 @@ xfs_bmap_shift_update_extent( error = xfs_bmbt_lookup_eq(cur, &prev, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(mp, i == 1); + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; error = xfs_bmbt_update(cur, got); if (error) @@ -5711,11 +5813,8 @@ xfs_bmap_collapse_extents( int error = 0; int logflags = 0; - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5739,8 +5838,10 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), - del_cursor); + if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { + error = -EFSCORRUPTED; + goto del_cursor; + } new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5829,11 +5930,8 @@ xfs_bmap_insert_extents( int error = 0; int logflags = 0; - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5866,11 +5964,14 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), - del_cursor); + if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { + error = -EFSCORRUPTED; + goto del_cursor; + } - if (stop_fsb >= got.br_startoff + got.br_blockcount) { - error = -EIO; + if (XFS_IS_CORRUPT(mp, + stop_fsb >= got.br_startoff + got.br_blockcount)) { + error = -EFSCORRUPTED; goto del_cursor; } @@ -5935,12 +6036,8 @@ xfs_bmap_split_extent_at( int logflags = 0; int i = 0; - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmap_split_extent_at", - XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5974,7 +6071,10 @@ xfs_bmap_split_extent_at( error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto del_cursor; + } } got.br_blockcount = gotblkcnt; @@ -5999,11 +6099,17 @@ xfs_bmap_split_extent_at( error = xfs_bmbt_lookup_eq(cur, &new, &i); if (error) goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto del_cursor; + } error = xfs_btree_insert(cur, &i); if (error) goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto del_cursor; + } } /* diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 71de937f9e64..e2cc98931552 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -105,11 +105,10 @@ xfs_btree_check_lblock( xfs_failaddr_t fa; fa = __xfs_btree_check_lblock(cur, block, level, bp); - if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, - XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { + if (XFS_IS_CORRUPT(mp, fa != NULL) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } return 0; @@ -169,11 +168,10 @@ xfs_btree_check_sblock( xfs_failaddr_t fa; fa = __xfs_btree_check_sblock(cur, block, level, bp); - if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, - XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { + if (XFS_IS_CORRUPT(mp, fa != NULL) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } return 0; @@ -384,7 +382,7 @@ xfs_btree_del_cursor( /* * Free the cursor. */ - kmem_zone_free(xfs_btree_cur_zone, cur); + kmem_cache_free(xfs_btree_cur_zone, cur); } /* @@ -717,25 +715,6 @@ xfs_btree_get_bufs( } /* - * Check for the cursor referring to the last block at the given level. - */ -int /* 1=is last block, 0=not last block */ -xfs_btree_islastblock( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to check */ -{ - struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); - else - return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); -} - -/* * Change the cursor to point to the first record at the given level. * Other levels are unaffected. */ @@ -1820,6 +1799,7 @@ xfs_btree_lookup_get_block( out_bad: *blkp = NULL; + xfs_buf_corruption_error(bp); xfs_trans_brelse(cur->bc_tp, bp); return -EFSCORRUPTED; } @@ -1867,7 +1847,7 @@ xfs_btree_lookup( XFS_BTREE_STATS_INC(cur, lookup); /* No such thing as a zero-level tree. */ - if (cur->bc_nlevels == 0) + if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) return -EFSCORRUPTED; block = NULL; @@ -1987,7 +1967,8 @@ xfs_btree_lookup( error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; *stat = 1; return 0; } @@ -2442,7 +2423,10 @@ xfs_btree_lshift( if (error) goto error0; i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_btree_decrement(tcur, level, &i); if (error) @@ -2609,7 +2593,10 @@ xfs_btree_rshift( if (error) goto error0; i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_btree_increment(tcur, level, &i); if (error) @@ -3463,7 +3450,10 @@ xfs_btree_insert( goto error0; } - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } level++; /* @@ -3867,15 +3857,24 @@ xfs_btree_delrec( * Actually any entry but the first would suffice. */ i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_btree_increment(tcur, level, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* Grab a pointer to the block. */ right = xfs_btree_get_block(tcur, level, &rbp); @@ -3919,12 +3918,18 @@ xfs_btree_delrec( rrecs = xfs_btree_get_numrecs(right); if (!xfs_btree_ptr_is_null(cur, &lptr)) { i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_btree_decrement(tcur, level, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } } } @@ -3938,13 +3943,19 @@ xfs_btree_delrec( * previous block. */ i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_btree_decrement(tcur, level, &i); if (error) goto error0; i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* Grab a pointer to the block. */ left = xfs_btree_get_block(tcur, level, &lbp); @@ -4286,6 +4297,7 @@ int xfs_btree_visit_blocks( struct xfs_btree_cur *cur, xfs_btree_visit_blocks_fn fn, + unsigned int flags, void *data) { union xfs_btree_ptr lptr; @@ -4311,6 +4323,11 @@ xfs_btree_visit_blocks( /* save for the next iteration of the loop */ xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); + + if (!(flags & XFS_BTREE_VISIT_LEAVES)) + continue; + } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) { + continue; } /* for each buffer in the level */ @@ -4413,7 +4430,7 @@ xfs_btree_change_owner( bbcoi.buffer_list = buffer_list; return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, - &bbcoi); + XFS_BTREE_VISIT_ALL, &bbcoi); } /* Verify the v5 fields of a long-format btree block. */ @@ -4865,7 +4882,7 @@ xfs_btree_count_blocks( { *blocks = 0; return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, - blocks); + XFS_BTREE_VISIT_ALL, blocks); } /* Compare two btree pointers. */ diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index ced1e65d1483..fb9b2121c628 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -183,6 +183,9 @@ union xfs_btree_cur_private { unsigned long nr_ops; /* # record updates */ int shape_changes; /* # of extent splits */ } refc; + struct { + bool active; /* allocation cursor state */ + } abt; }; /* @@ -315,14 +318,6 @@ xfs_btree_get_bufs( xfs_agblock_t agbno); /* allocation group block number */ /* - * Check for the cursor referring to the last block at the given level. - */ -int /* 1=is last block, 0=not last block */ -xfs_btree_islastblock( - xfs_btree_cur_t *cur, /* btree cursor */ - int level); /* level to check */ - -/* * Compute first and last byte offsets for the fields given. * Interprets the offsets table, which contains struct field offsets. */ @@ -482,8 +477,15 @@ int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn, typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, void *data); +/* Visit record blocks. */ +#define XFS_BTREE_VISIT_RECORDS (1 << 0) +/* Visit leaf blocks. */ +#define XFS_BTREE_VISIT_LEAVES (1 << 1) +/* Visit all blocks. */ +#define XFS_BTREE_VISIT_ALL (XFS_BTREE_VISIT_RECORDS | \ + XFS_BTREE_VISIT_LEAVES) int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, - xfs_btree_visit_blocks_fn fn, void *data); + xfs_btree_visit_blocks_fn fn, unsigned int flags, void *data); int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks); @@ -514,4 +516,21 @@ int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, union xfs_btree_irec *high, bool *exists); bool xfs_btree_has_more_records(struct xfs_btree_cur *cur); +/* Does this cursor point to the last block in the given level? */ +static inline bool +xfs_btree_islastblock( + xfs_btree_cur_t *cur, + int level) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + + block = xfs_btree_get_block(cur, level, &bp); + ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); + return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); +} + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 4fd1223c1bd5..8c3eafe280ed 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -12,9 +12,9 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" +#include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_attr_leaf.h" @@ -107,7 +107,66 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ - kmem_zone_free(xfs_da_state_zone, state); + kmem_cache_free(xfs_da_state_zone, state); +} + +static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) +{ + if (whichfork == XFS_DATA_FORK) + return mp->m_dir_geo->fsbcount; + return mp->m_attr_geo->fsbcount; +} + +void +xfs_da3_node_hdr_from_disk( + struct xfs_mount *mp, + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from; + + to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); + to->back = be32_to_cpu(from3->hdr.info.hdr.back); + to->magic = be16_to_cpu(from3->hdr.info.hdr.magic); + to->count = be16_to_cpu(from3->hdr.__count); + to->level = be16_to_cpu(from3->hdr.__level); + to->btree = from3->__btree; + ASSERT(to->magic == XFS_DA3_NODE_MAGIC); + } else { + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.__count); + to->level = be16_to_cpu(from->hdr.__level); + to->btree = from->__btree; + ASSERT(to->magic == XFS_DA_NODE_MAGIC); + } +} + +void +xfs_da3_node_hdr_to_disk( + struct xfs_mount *mp, + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to; + + ASSERT(from->magic == XFS_DA3_NODE_MAGIC); + to3->hdr.info.hdr.forw = cpu_to_be32(from->forw); + to3->hdr.info.hdr.back = cpu_to_be32(from->back); + to3->hdr.info.hdr.magic = cpu_to_be16(from->magic); + to3->hdr.__count = cpu_to_be16(from->count); + to3->hdr.__level = cpu_to_be16(from->level); + } else { + ASSERT(from->magic == XFS_DA_NODE_MAGIC); + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.__count = cpu_to_be16(from->count); + to->hdr.__level = cpu_to_be16(from->level); + } } /* @@ -145,12 +204,9 @@ xfs_da3_node_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_da_intnode *hdr = bp->b_addr; struct xfs_da3_icnode_hdr ichdr; - const struct xfs_dir_ops *ops; xfs_failaddr_t fa; - ops = xfs_dir_get_ops(mp, NULL); - - ops->node_hdr_from_disk(&ichdr, hdr); + xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr); fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); if (fa) @@ -275,46 +331,76 @@ const struct xfs_buf_ops xfs_da3_node_buf_ops = { .verify_struct = xfs_da3_node_verify_struct, }; +static int +xfs_da3_node_set_type( + struct xfs_trans *tp, + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); + return 0; + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF); + return 0; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); + return 0; + default: + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp, + info, sizeof(*info)); + xfs_trans_brelse(tp, bp); + return -EFSCORRUPTED; + } +} + int xfs_da3_node_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, + struct xfs_buf **bpp, + int whichfork) +{ + int error; + + error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork, + &xfs_da3_node_buf_ops); + if (error || !*bpp || !tp) + return error; + return xfs_da3_node_set_type(tp, *bpp); +} + +int +xfs_da3_node_read_mapped( + struct xfs_trans *tp, + struct xfs_inode *dp, xfs_daddr_t mappedbno, struct xfs_buf **bpp, - int which_fork) + int whichfork) { - int err; + struct xfs_mount *mp = dp->i_mount; + int error; - err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, - which_fork, &xfs_da3_node_buf_ops); - if (!err && tp && *bpp) { - struct xfs_da_blkinfo *info = (*bpp)->b_addr; - int type; + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno, + XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0, + bpp, &xfs_da3_node_buf_ops); + if (error || !*bpp) + return error; - switch (be16_to_cpu(info->magic)) { - case XFS_DA_NODE_MAGIC: - case XFS_DA3_NODE_MAGIC: - type = XFS_BLFT_DA_NODE_BUF; - break; - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: - type = XFS_BLFT_ATTR_LEAF_BUF; - break; - case XFS_DIR2_LEAFN_MAGIC: - case XFS_DIR3_LEAFN_MAGIC: - type = XFS_BLFT_DIR_LEAFN_BUF; - break; - default: - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - tp->t_mountp, info, sizeof(*info)); - xfs_trans_brelse(tp, *bpp); - *bpp = NULL; - return -EFSCORRUPTED; - } - xfs_trans_buf_set_type(tp, *bpp, type); - } - return err; + if (whichfork == XFS_ATTR_FORK) + xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF); + else + xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF); + + if (!tp) + return 0; + return xfs_da3_node_set_type(tp, *bpp); } /*======================================================================== @@ -343,7 +429,7 @@ xfs_da3_node_create( trace_xfs_da_node_create(args); ASSERT(level <= XFS_DA_NODE_MAXDEPTH); - error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); + error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork); if (error) return error; bp->b_ops = &xfs_da3_node_buf_ops; @@ -363,9 +449,9 @@ xfs_da3_node_create( } ichdr.level = level; - dp->d_ops->node_hdr_to_disk(node, &ichdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr); xfs_trans_log_buf(tp, bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size)); *bpp = bp; return 0; @@ -504,6 +590,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { + xfs_buf_corruption_error(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -516,6 +603,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { + xfs_buf_corruption_error(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -568,7 +656,7 @@ xfs_da3_root_split( dp = args->dp; tp = args->trans; - error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); + error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork); if (error) return error; node = bp->b_addr; @@ -577,8 +665,8 @@ xfs_da3_root_split( oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { struct xfs_da3_icnode_hdr icnodehdr; - dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot); - btree = dp->d_ops->node_tree_p(oldroot); + xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot); + btree = icnodehdr.btree; size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); level = icnodehdr.level; @@ -589,15 +677,14 @@ xfs_da3_root_split( xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); } else { struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; leaf = (xfs_dir2_leaf_t *)oldroot; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); - size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); + size = (int)((char *)&leafhdr.ents[leafhdr.count] - + (char *)leaf); level = 0; /* @@ -637,14 +724,14 @@ xfs_da3_root_split( return error; node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); + btree = nodehdr.btree; btree[0].hashval = cpu_to_be32(blk1->hashval); btree[0].before = cpu_to_be32(blk1->blkno); btree[1].hashval = cpu_to_be32(blk2->hashval); btree[1].before = cpu_to_be32(blk2->blkno); nodehdr.count = 2; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); #ifdef DEBUG if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || @@ -686,7 +773,7 @@ xfs_da3_node_split( trace_xfs_da_node_split(state->args); node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); /* * With V2 dirs the extra block is data or freespace. @@ -733,7 +820,7 @@ xfs_da3_node_split( * If we had double-split op below us, then add the extra block too. */ node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); if (oldblk->index <= nodehdr.count) { oldblk->index++; xfs_da3_node_add(state, oldblk, addblk); @@ -788,10 +875,10 @@ xfs_da3_node_rebalance( node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); + btree1 = nodehdr1.btree; + btree2 = nodehdr2.btree; /* * Figure out how many entries need to move, and in which direction. @@ -804,10 +891,10 @@ xfs_da3_node_rebalance( tmpnode = node1; node1 = node2; node2 = tmpnode; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); + btree1 = nodehdr1.btree; + btree2 = nodehdr2.btree; swap = 1; } @@ -869,14 +956,15 @@ xfs_da3_node_rebalance( /* * Log header of node 1 and all current bits of node 2. */ - dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); + xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1); xfs_trans_log_buf(tp, blk1->bp, - XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node1, &node1->hdr, + state->args->geo->node_hdr_size)); - dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); + xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2); xfs_trans_log_buf(tp, blk2->bp, XFS_DA_LOGRANGE(node2, &node2->hdr, - dp->d_ops->node_hdr_size + + state->args->geo->node_hdr_size + (sizeof(btree2[0]) * nodehdr2.count))); /* @@ -886,10 +974,10 @@ xfs_da3_node_rebalance( if (swap) { node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); + btree1 = nodehdr1.btree; + btree2 = nodehdr2.btree; } blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); @@ -921,8 +1009,8 @@ xfs_da3_node_add( trace_xfs_da_node_add(state->args); node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); + btree = nodehdr.btree; ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); ASSERT(newblk->blkno != 0); @@ -945,9 +1033,10 @@ xfs_da3_node_add( tmp + sizeof(*btree))); nodehdr.count += 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); xfs_trans_log_buf(state->args->trans, oldblk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, + state->args->geo->node_hdr_size)); /* * Copy the last hash value from the oldblk to propagate upwards. @@ -1082,7 +1171,6 @@ xfs_da3_root_join( xfs_dablk_t child; struct xfs_buf *bp; struct xfs_da3_icnode_hdr oldroothdr; - struct xfs_da_node_entry *btree; int error; struct xfs_inode *dp = state->args->dp; @@ -1092,7 +1180,7 @@ xfs_da3_root_join( args = state->args; oldroot = root_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); + xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot); ASSERT(oldroothdr.forw == 0); ASSERT(oldroothdr.back == 0); @@ -1106,11 +1194,9 @@ xfs_da3_root_join( * Read in the (only) child block, then copy those bytes into * the root block's buffer and free the original child block. */ - btree = dp->d_ops->node_tree_p(oldroot); - child = be32_to_cpu(btree[0].before); + child = be32_to_cpu(oldroothdr.btree[0].before); ASSERT(child != 0); - error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, - args->whichfork); + error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork); if (error) return error; xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); @@ -1172,7 +1258,7 @@ xfs_da3_node_toosmall( blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->b_addr; node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); if (nodehdr.count > (state->args->geo->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return 0; /* blk over 50%, don't try to join */ @@ -1224,13 +1310,13 @@ xfs_da3_node_toosmall( blkno = nodehdr.back; if (blkno == 0) continue; - error = xfs_da3_node_read(state->args->trans, dp, - blkno, -1, &bp, state->args->whichfork); + error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp, + state->args->whichfork); if (error) return error; node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&thdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node); xfs_trans_brelse(state->args->trans, bp); if (count - thdr.count >= 0) @@ -1272,18 +1358,14 @@ xfs_da3_node_lasthash( struct xfs_buf *bp, int *count) { - struct xfs_da_intnode *node; - struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; - node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr); if (count) *count = nodehdr.count; if (!nodehdr.count) return 0; - btree = dp->d_ops->node_tree_p(node); - return be32_to_cpu(btree[nodehdr.count - 1].hashval); + return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval); } /* @@ -1328,8 +1410,8 @@ xfs_da3_fixhashpath( struct xfs_da3_icnode_hdr nodehdr; node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); + btree = nodehdr.btree; if (be32_to_cpu(btree[blk->index].hashval) == lasthash) break; blk->hashval = lasthash; @@ -1360,7 +1442,7 @@ xfs_da3_node_remove( trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); ASSERT(drop_blk->index < nodehdr.count); ASSERT(drop_blk->index >= 0); @@ -1368,7 +1450,7 @@ xfs_da3_node_remove( * Copy over the offending entry, or just zero it out. */ index = drop_blk->index; - btree = dp->d_ops->node_tree_p(node); + btree = nodehdr.btree; if (index < nodehdr.count - 1) { tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); @@ -1381,9 +1463,9 @@ xfs_da3_node_remove( xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1416,10 +1498,10 @@ xfs_da3_node_unbalance( drop_node = drop_blk->bp->b_addr; save_node = save_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); - dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); - drop_btree = dp->d_ops->node_tree_p(drop_node); - save_btree = dp->d_ops->node_tree_p(save_node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node); + drop_btree = drop_hdr.btree; + save_btree = save_hdr.btree; tp = state->args->trans; /* @@ -1453,10 +1535,10 @@ xfs_da3_node_unbalance( memcpy(&save_btree[sindex], &drop_btree[0], tmp); save_hdr.count += drop_hdr.count; - dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); + xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr); xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, - dp->d_ops->node_hdr_size)); + state->args->geo->node_hdr_size)); /* * Save the last hashval in the remaining block for upward propagation. @@ -1517,7 +1599,7 @@ xfs_da3_node_lookup_int( */ blk->blkno = blkno; error = xfs_da3_node_read(args->trans, args->dp, blkno, - -1, &blk->bp, args->whichfork); + &blk->bp, args->whichfork); if (error) { blk->blkno = 0; state->path.active--; @@ -1541,8 +1623,10 @@ xfs_da3_node_lookup_int( break; } - if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) + if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { + xfs_buf_corruption_error(blk->bp); return -EFSCORRUPTED; + } blk->magic = XFS_DA_NODE_MAGIC; @@ -1550,19 +1634,22 @@ xfs_da3_node_lookup_int( * Search an intermediate node for a match. */ node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); + btree = nodehdr.btree; /* Tree taller than we can handle; bail out! */ - if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { + xfs_buf_corruption_error(blk->bp); return -EFSCORRUPTED; + } /* Check the level from the root. */ if (blkno == args->geo->leafblk) expected_level = nodehdr.level - 1; - else if (expected_level != nodehdr.level) + else if (expected_level != nodehdr.level) { + xfs_buf_corruption_error(blk->bp); return -EFSCORRUPTED; - else + } else expected_level--; max = nodehdr.count; @@ -1612,11 +1699,11 @@ xfs_da3_node_lookup_int( } /* We can't point back to the root. */ - if (blkno == args->geo->leafblk) + if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) return -EFSCORRUPTED; } - if (expected_level != 0) + if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) return -EFSCORRUPTED; /* @@ -1678,10 +1765,10 @@ xfs_da3_node_order( node1 = node1_bp->b_addr; node2 = node2_bp->b_addr; - dp->d_ops->node_hdr_from_disk(&node1hdr, node1); - dp->d_ops->node_hdr_from_disk(&node2hdr, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1); + xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2); + btree1 = node1hdr.btree; + btree2 = node2hdr.btree; if (node1hdr.count > 0 && node2hdr.count > 0 && ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || @@ -1746,7 +1833,7 @@ xfs_da3_blk_link( if (old_info->back) { error = xfs_da3_node_read(args->trans, dp, be32_to_cpu(old_info->back), - -1, &bp, args->whichfork); + &bp, args->whichfork); if (error) return error; ASSERT(bp != NULL); @@ -1767,7 +1854,7 @@ xfs_da3_blk_link( if (old_info->forw) { error = xfs_da3_node_read(args->trans, dp, be32_to_cpu(old_info->forw), - -1, &bp, args->whichfork); + &bp, args->whichfork); if (error) return error; ASSERT(bp != NULL); @@ -1826,7 +1913,7 @@ xfs_da3_blk_unlink( if (drop_info->back) { error = xfs_da3_node_read(args->trans, args->dp, be32_to_cpu(drop_info->back), - -1, &bp, args->whichfork); + &bp, args->whichfork); if (error) return error; ASSERT(bp != NULL); @@ -1843,7 +1930,7 @@ xfs_da3_blk_unlink( if (drop_info->forw) { error = xfs_da3_node_read(args->trans, args->dp, be32_to_cpu(drop_info->forw), - -1, &bp, args->whichfork); + &bp, args->whichfork); if (error) return error; ASSERT(bp != NULL); @@ -1878,7 +1965,6 @@ xfs_da3_path_shift( { struct xfs_da_state_blk *blk; struct xfs_da_blkinfo *info; - struct xfs_da_intnode *node; struct xfs_da_args *args; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; @@ -1901,17 +1987,16 @@ xfs_da3_path_shift( ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); level = (path->active-1) - 1; /* skip bottom layer in path */ for (blk = &path->blk[level]; level >= 0; blk--, level--) { - node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, + blk->bp->b_addr); if (forward && (blk->index < nodehdr.count - 1)) { blk->index++; - blkno = be32_to_cpu(btree[blk->index].before); + blkno = be32_to_cpu(nodehdr.btree[blk->index].before); break; } else if (!forward && (blk->index > 0)) { blk->index--; - blkno = be32_to_cpu(btree[blk->index].before); + blkno = be32_to_cpu(nodehdr.btree[blk->index].before); break; } } @@ -1929,7 +2014,7 @@ xfs_da3_path_shift( /* * Read the next child block into a local buffer. */ - error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp, + error = xfs_da3_node_read(args->trans, dp, blkno, &bp, args->whichfork); if (error) return error; @@ -1962,9 +2047,9 @@ xfs_da3_path_shift( case XFS_DA_NODE_MAGIC: case XFS_DA3_NODE_MAGIC: blk->magic = XFS_DA_NODE_MAGIC; - node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, + bp->b_addr); + btree = nodehdr.btree; blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); if (forward) blk->index = 0; @@ -2044,18 +2129,6 @@ xfs_da_compname( XFS_CMP_EXACT : XFS_CMP_DIFFERENT; } -static xfs_dahash_t -xfs_default_hashname( - struct xfs_name *name) -{ - return xfs_da_hashname(name->name, name->len); -} - -const struct xfs_nameops xfs_default_nameops = { - .hashname = xfs_default_hashname, - .compname = xfs_da_compname -}; - int xfs_da_grow_inode_int( struct xfs_da_args *args, @@ -2213,16 +2286,13 @@ xfs_da3_swap_lastblock( error = xfs_bmap_last_before(tp, dp, &lastoff, w); if (error) return error; - if (unlikely(lastoff == 0)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, - mp); + if (XFS_IS_CORRUPT(mp, lastoff == 0)) return -EFSCORRUPTED; - } /* * Read the last block in the btree space. */ last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; - error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); + error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w); if (error) return error; /* @@ -2240,16 +2310,17 @@ xfs_da3_swap_lastblock( struct xfs_dir2_leaf_entry *ents; dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); - ents = dp->d_ops->leaf_ents_p(dead_leaf2); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, + dead_leaf2); + ents = leafhdr.ents; dead_level = 0; dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); } else { struct xfs_da3_icnode_hdr deadhdr; dead_node = (xfs_da_intnode_t *)dead_info; - dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); - btree = dp->d_ops->node_tree_p(dead_node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node); + btree = deadhdr.btree; dead_level = deadhdr.level; dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); } @@ -2258,15 +2329,13 @@ xfs_da3_swap_lastblock( * If the moved block has a left sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->back))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; - if (unlikely( - be32_to_cpu(sib_info->forw) != last_blkno || - sib_info->magic != dead_info->magic)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", - XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, + be32_to_cpu(sib_info->forw) != last_blkno || + sib_info->magic != dead_info->magic)) { error = -EFSCORRUPTED; goto done; } @@ -2280,15 +2349,13 @@ xfs_da3_swap_lastblock( * If the moved block has a right sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->forw))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; - if (unlikely( - be32_to_cpu(sib_info->back) != last_blkno || - sib_info->magic != dead_info->magic)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", - XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, + be32_to_cpu(sib_info->back) != last_blkno || + sib_info->magic != dead_info->magic)) { error = -EFSCORRUPTED; goto done; } @@ -2304,27 +2371,24 @@ xfs_da3_swap_lastblock( * Walk down the tree looking for the parent of the moved block. */ for (;;) { - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); - if (level >= 0 && level != par_hdr.level + 1) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", - XFS_ERRLEVEL_LOW, mp); + xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); + if (XFS_IS_CORRUPT(mp, + level >= 0 && level != par_hdr.level + 1)) { error = -EFSCORRUPTED; goto done; } level = par_hdr.level; - btree = dp->d_ops->node_tree_p(par_node); + btree = par_hdr.btree; for (entno = 0; entno < par_hdr.count && be32_to_cpu(btree[entno].hashval) < dead_hash; entno++) continue; - if (entno == par_hdr.count) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", - XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) { error = -EFSCORRUPTED; goto done; } @@ -2349,24 +2413,20 @@ xfs_da3_swap_lastblock( par_blkno = par_hdr.forw; xfs_trans_brelse(tp, par_buf); par_buf = NULL; - if (unlikely(par_blkno == 0)) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", - XFS_ERRLEVEL_LOW, mp); + if (XFS_IS_CORRUPT(mp, par_blkno == 0)) { error = -EFSCORRUPTED; goto done; } - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); - if (par_hdr.level != level) { - XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", - XFS_ERRLEVEL_LOW, mp); + xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); + if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) { error = -EFSCORRUPTED; goto done; } - btree = dp->d_ops->node_tree_p(par_node); + btree = par_hdr.btree; entno = 0; } /* @@ -2429,159 +2489,84 @@ xfs_da_shrink_inode( return error; } -/* - * See if the mapping(s) for this btree block are valid, i.e. - * don't contain holes, are logically contiguous, and cover the whole range. - */ -STATIC int -xfs_da_map_covers_blocks( - int nmap, - xfs_bmbt_irec_t *mapp, - xfs_dablk_t bno, - int count) -{ - int i; - xfs_fileoff_t off; - - for (i = 0, off = bno; i < nmap; i++) { - if (mapp[i].br_startblock == HOLESTARTBLOCK || - mapp[i].br_startblock == DELAYSTARTBLOCK) { - return 0; - } - if (off != mapp[i].br_startoff) { - return 0; - } - off += mapp[i].br_blockcount; - } - return off == bno + count; -} - -/* - * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. - * - * For the single map case, it is assumed that the caller has provided a pointer - * to a valid xfs_buf_map. For the multiple map case, this function will - * allocate the xfs_buf_map to hold all the maps and replace the caller's single - * map pointer with the allocated map. - */ static int -xfs_buf_map_from_irec( - struct xfs_mount *mp, +xfs_dabuf_map( + struct xfs_inode *dp, + xfs_dablk_t bno, + unsigned int flags, + int whichfork, struct xfs_buf_map **mapp, - int *nmaps, - struct xfs_bmbt_irec *irecs, - int nirecs) + int *nmaps) { - struct xfs_buf_map *map; - int i; - - ASSERT(*nmaps == 1); - ASSERT(nirecs >= 1); + struct xfs_mount *mp = dp->i_mount; + int nfsb = xfs_dabuf_nfsb(mp, whichfork); + struct xfs_bmbt_irec irec, *irecs = &irec; + struct xfs_buf_map *map = *mapp; + xfs_fileoff_t off = bno; + int error = 0, nirecs, i; + + if (nfsb > 1) + irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS); + + nirecs = nfsb; + error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs, + xfs_bmapi_aflag(whichfork)); + if (error) + goto out_free_irecs; + /* + * Use the caller provided map for the single map case, else allocate a + * larger one that needs to be free by the caller. + */ if (nirecs > 1) { - map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), - KM_NOFS); + map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS); if (!map) - return -ENOMEM; + goto out_free_irecs; *mapp = map; } - *nmaps = nirecs; - map = *mapp; - for (i = 0; i < *nmaps; i++) { - ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && - irecs[i].br_startblock != HOLESTARTBLOCK); + for (i = 0; i < nirecs; i++) { + if (irecs[i].br_startblock == HOLESTARTBLOCK || + irecs[i].br_startblock == DELAYSTARTBLOCK) + goto invalid_mapping; + if (off != irecs[i].br_startoff) + goto invalid_mapping; + map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); + off += irecs[i].br_blockcount; } - return 0; -} - -/* - * Map the block we are given ready for reading. There are three possible return - * values: - * -1 - will be returned if we land in a hole and mappedbno == -2 so the - * caller knows not to execute a subsequent read. - * 0 - if we mapped the block successfully - * >0 - positive error number if there was an error. - */ -static int -xfs_dabuf_map( - struct xfs_inode *dp, - xfs_dablk_t bno, - xfs_daddr_t mappedbno, - int whichfork, - struct xfs_buf_map **map, - int *nmaps) -{ - struct xfs_mount *mp = dp->i_mount; - int nfsb; - int error = 0; - struct xfs_bmbt_irec irec; - struct xfs_bmbt_irec *irecs = &irec; - int nirecs; - ASSERT(map && *map); - ASSERT(*nmaps == 1); + if (off != bno + nfsb) + goto invalid_mapping; - if (whichfork == XFS_DATA_FORK) - nfsb = mp->m_dir_geo->fsbcount; - else - nfsb = mp->m_attr_geo->fsbcount; - - /* - * Caller doesn't have a mapping. -2 means don't complain - * if we land in a hole. - */ - if (mappedbno == -1 || mappedbno == -2) { - /* - * Optimize the one-block case. - */ - if (nfsb != 1) - irecs = kmem_zalloc(sizeof(irec) * nfsb, - KM_NOFS); + *nmaps = nirecs; +out_free_irecs: + if (irecs != &irec) + kmem_free(irecs); + return error; - nirecs = nfsb; - error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, - &nirecs, xfs_bmapi_aflag(whichfork)); - if (error) - goto out; - } else { - irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); - irecs->br_startoff = (xfs_fileoff_t)bno; - irecs->br_blockcount = nfsb; - irecs->br_state = 0; - nirecs = 1; - } +invalid_mapping: + /* Caller ok with no mapping. */ + if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) { + error = -EFSCORRUPTED; + if (xfs_error_level >= XFS_ERRLEVEL_LOW) { + xfs_alert(mp, "%s: bno %u inode %llu", + __func__, bno, dp->i_ino); - if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { - error = mappedbno == -2 ? -1 : -EFSCORRUPTED; - if (unlikely(error == -EFSCORRUPTED)) { - if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - int i; - xfs_alert(mp, "%s: bno %lld dir: inode %lld", - __func__, (long long)bno, - (long long)dp->i_ino); - for (i = 0; i < *nmaps; i++) { - xfs_alert(mp, + for (i = 0; i < nirecs; i++) { + xfs_alert(mp, "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", - i, - (long long)irecs[i].br_startoff, - (long long)irecs[i].br_startblock, - (long long)irecs[i].br_blockcount, - irecs[i].br_state); - } + i, irecs[i].br_startoff, + irecs[i].br_startblock, + irecs[i].br_blockcount, + irecs[i].br_state); } - XFS_ERROR_REPORT("xfs_da_do_buf(1)", - XFS_ERRLEVEL_LOW, mp); } - goto out; + } else { + *nmaps = 0; } - error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); -out: - if (irecs != &irec) - kmem_free(irecs); - return error; + goto out_free_irecs; } /* @@ -2589,37 +2574,28 @@ out: */ int xfs_da_get_buf( - struct xfs_trans *trans, + struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork) { + struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp; - struct xfs_buf_map map; - struct xfs_buf_map *mapp; - int nmap; + struct xfs_buf_map map, *mapp = ↦ + int nmap = 1; int error; *bpp = NULL; - mapp = ↦ - nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; + error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap); + if (error || nmap == 0) goto out_free; - } - bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, - mapp, nmap, 0); + bp = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0); error = bp ? bp->b_error : -EIO; if (error) { if (bp) - xfs_trans_brelse(trans, bp); + xfs_trans_brelse(tp, bp); goto out_free; } @@ -2637,35 +2613,27 @@ out_free: */ int xfs_da_read_buf( - struct xfs_trans *trans, + struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mappedbno, + unsigned int flags, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops) { + struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp; - struct xfs_buf_map map; - struct xfs_buf_map *mapp; - int nmap; + struct xfs_buf_map map, *mapp = ↦ + int nmap = 1; int error; *bpp = NULL; - mapp = ↦ - nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; + error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); + if (error || !nmap) goto out_free; - } - error = xfs_trans_read_buf_map(dp->i_mount, trans, - dp->i_mount->m_ddev_targp, - mapp, nmap, 0, &bp, ops); + error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0, + &bp, ops); if (error) goto out_free; @@ -2688,7 +2656,7 @@ int xfs_da_reada_buf( struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mappedbno, + unsigned int flags, int whichfork, const struct xfs_buf_ops *ops) { @@ -2699,16 +2667,10 @@ xfs_da_reada_buf( mapp = ↦ nmap = 1; - error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, - &mapp, &nmap); - if (error) { - /* mapping a hole is not an error, but we don't continue */ - if (error == -1) - error = 0; + error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); + if (error || !nmap) goto out_free; - } - mappedbno = mapp[0].bm_bn; xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); out_free: diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ae0bbd20d9ca..e16610d1c14f 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -10,7 +10,6 @@ struct xfs_inode; struct xfs_trans; struct zone; -struct xfs_dir_ops; /* * Directory/attribute geometry information. There will be one of these for each @@ -18,15 +17,23 @@ struct xfs_dir_ops; * structures will be attached to the xfs_mount. */ struct xfs_da_geometry { - int blksize; /* da block size in bytes */ - int fsbcount; /* da block size in filesystem blocks */ + unsigned int blksize; /* da block size in bytes */ + unsigned int fsbcount; /* da block size in filesystem blocks */ uint8_t fsblog; /* log2 of _filesystem_ block size */ uint8_t blklog; /* log2 of da block size */ - uint node_ents; /* # of entries in a danode */ - int magicpct; /* 37% of block size in bytes */ + unsigned int node_hdr_size; /* danode header size in bytes */ + unsigned int node_ents; /* # of entries in a danode */ + unsigned int magicpct; /* 37% of block size in bytes */ xfs_dablk_t datablk; /* blockno of dir data v2 */ + unsigned int leaf_hdr_size; /* dir2 leaf header size */ + unsigned int leaf_max_ents; /* # of entries in dir2 leaf */ xfs_dablk_t leafblk; /* blockno of leaf data v2 */ + unsigned int free_hdr_size; /* dir2 free header size */ + unsigned int free_max_bests; /* # of bests entries in dir2 free */ xfs_dablk_t freeblk; /* blockno of free data v2 */ + + xfs_dir2_data_aoff_t data_first_offset; + size_t data_entry_offset; }; /*======================================================================== @@ -125,6 +132,25 @@ typedef struct xfs_da_state { } xfs_da_state_t; /* + * In-core version of the node header to abstract the differences in the v2 and + * v3 disk format of the headers. Callers need to convert to/from disk format as + * appropriate. + */ +struct xfs_da3_icnode_hdr { + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t level; + + /* + * Pointer to the on-disk format entries, which are behind the + * variable size (v4 vs v5) header in the on-disk block. + */ + struct xfs_da_node_entry *btree; +}; + +/* * Utility macros to aid in logging changed structure fields. */ #define XFS_DA_LOGOFF(BASE, ADDR) ((char *)(ADDR) - (char *)(BASE)) @@ -132,16 +158,6 @@ typedef struct xfs_da_state { (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) -/* - * Name ops for directory and/or attr name operations - */ -struct xfs_nameops { - xfs_dahash_t (*hashname)(struct xfs_name *); - enum xfs_dacmp (*compname)(struct xfs_da_args *, - const unsigned char *, int); -}; - - /*======================================================================== * Function prototypes. *========================================================================*/ @@ -172,25 +188,28 @@ int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_state_blk_t *new_blk); int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mappedbno, - struct xfs_buf **bpp, int which_fork); + xfs_dablk_t bno, struct xfs_buf **bpp, int whichfork); +int xfs_da3_node_read_mapped(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_daddr_t mappedbno, struct xfs_buf **bpp, + int whichfork); /* * Utility routines. */ + +#define XFS_DABUF_MAP_HOLE_OK (1 << 0) + int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, int count); int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mappedbno, - struct xfs_buf **bp, int whichfork); + xfs_dablk_t bno, struct xfs_buf **bp, int whichfork); int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mappedbno, - struct xfs_buf **bpp, int whichfork, - const struct xfs_buf_ops *ops); + xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp, + int whichfork, const struct xfs_buf_ops *ops); int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mapped_bno, int whichfork, - const struct xfs_buf_ops *ops); + unsigned int flags, int whichfork, + const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, struct xfs_buf *dead_buf); @@ -202,7 +221,11 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); +void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, + struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from); +void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, + struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); + extern struct kmem_zone *xfs_da_state_zone; -extern const struct xfs_nameops xfs_default_nameops; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c deleted file mode 100644 index b1ae572496b6..000000000000 --- a/fs/xfs/libxfs/xfs_da_format.c +++ /dev/null @@ -1,888 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_dir2.h" - -/* - * Shortform directory ops - */ -static int -xfs_dir2_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ - - count += len; /* name */ - count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */ - return count; -} - -static int -xfs_dir3_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t); -} - -static struct xfs_dir2_sf_entry * -xfs_dir2_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); -} - -static struct xfs_dir2_sf_entry * -xfs_dir3_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); -} - - -/* - * For filetype enabled shortform directories, the file type field is stored at - * the end of the name. Because it's only a single byte, endian conversion is - * not necessary. For non-filetype enable directories, the type is always - * unknown and we never store the value. - */ -static uint8_t -xfs_dir2_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static uint8_t -xfs_dir3_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - uint8_t ftype; - - ftype = sfep->name[sfep->namelen]; - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); - - sfep->name[sfep->namelen] = ftype; -} - -/* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - uint8_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(from) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(from); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - uint8_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, to); - else - put_unaligned_be32(ino, to); -} - -static xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, hdr->parent); -} - -static void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. If the entry stores a filetype value, then it - * sits between the name and the inode number. Hence the inode numbers may only - * be accessed through the helpers below. - */ -static xfs_ino_t -xfs_dir2_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen]); -} - -static void -xfs_dir2_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen], ino); -} - -static xfs_ino_t -xfs_dir3_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen + 1]); -} - -static void -xfs_dir3_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen + 1], ino); -} - - -/* - * Directory data block operations - */ - -/* - * For special situations, the dirent size ends up fixed because we always know - * what the size of the entry is. That's true for the "." and "..", and - * therefore we know that they are a fixed size and hence their offsets are - * constant, as is the first entry. - * - * Hence, this calculation is written as a macro to be able to be calculated at - * compile time and so certain offsets can be calculated directly in the - * structure initaliser via the macro. There are two macros - one for dirents - * with ftype and without so there are no unresolvable conditionals in the - * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power - * of 2 and the compiler doesn't reject it (unlike roundup()). - */ -#define XFS_DIR2_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) - -#define XFS_DIR3_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \ - XFS_DIR2_DATA_ALIGN) - -static int -xfs_dir2_data_entsize( - int n) -{ - return XFS_DIR2_DATA_ENTSIZE(n); -} - -static int -xfs_dir3_data_entsize( - int n) -{ - return XFS_DIR3_DATA_ENTSIZE(n); -} - -static uint8_t -xfs_dir2_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_data_put_ftype( - struct xfs_dir2_data_entry *dep, - uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static uint8_t -xfs_dir3_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - uint8_t ftype = dep->name[dep->namelen]; - - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_data_put_ftype( - struct xfs_dir2_data_entry *dep, - uint8_t type) -{ - ASSERT(type < XFS_DIR3_FT_MAX); - ASSERT(dep->namelen != 0); - - dep->name[dep->namelen] = type; -} - -/* - * Pointer to an entry's tag word. - */ -static __be16 * -xfs_dir2_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -static __be16 * -xfs_dir3_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * location of . and .. in data space (always block 0) - */ -static struct xfs_dir2_data_entry * -xfs_dir2_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_free * -xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return hdr->bestfree; -} - -static struct xfs_dir2_data_free * -xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir3_data_hdr *)hdr)->best_free; -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - - -/* - * Directory Leaf block operations - */ -static int -xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return lp->__ents; -} - -static int -xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return ((struct xfs_dir3_leaf *)lp)->__ents; -} - -static void -xfs_dir2_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.count); - to->stale = be16_to_cpu(from->hdr.stale); - - ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || - to->magic == XFS_DIR2_LEAFN_MAGIC); -} - -static void -xfs_dir2_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || - from->magic == XFS_DIR2_LEAFN_MAGIC); - - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.count = cpu_to_be16(from->count); - to->hdr.stale = cpu_to_be16(from->stale); -} - -static void -xfs_dir3_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->count); - to->stale = be16_to_cpu(hdr3->stale); - - ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || - to->magic == XFS_DIR3_LEAFN_MAGIC); -} - -static void -xfs_dir3_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || - from->magic == XFS_DIR3_LEAFN_MAGIC); - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->count = cpu_to_be16(from->count); - hdr3->stale = cpu_to_be16(from->stale); -} - - -/* - * Directory/Attribute Node block operations - */ -static struct xfs_da_node_entry * -xfs_da2_node_tree_p(struct xfs_da_intnode *dap) -{ - return dap->__btree; -} - -static struct xfs_da_node_entry * -xfs_da3_node_tree_p(struct xfs_da_intnode *dap) -{ - return ((struct xfs_da3_intnode *)dap)->__btree; -} - -static void -xfs_da2_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.__count); - to->level = be16_to_cpu(from->hdr.__level); -} - -static void -xfs_da2_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - ASSERT(from->magic == XFS_DA_NODE_MAGIC); - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.__count = cpu_to_be16(from->count); - to->hdr.__level = cpu_to_be16(from->level); -} - -static void -xfs_da3_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; - - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->__count); - to->level = be16_to_cpu(hdr3->__level); -} - -static void -xfs_da3_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; - - ASSERT(from->magic == XFS_DA3_NODE_MAGIC); - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->__count = cpu_to_be16(from->count); - hdr3->__level = cpu_to_be16(from->level); -} - - -/* - * Directory free space block operations - */ -static int -xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir2_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + - (db / xfs_dir2_free_max_bests(geo)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return db % xfs_dir2_free_max_bests(geo); -} - -static int -xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) -{ - return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir3_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + - (db / xfs_dir3_free_max_bests(geo)); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) -{ - return db % xfs_dir3_free_max_bests(geo); -} - -static void -xfs_dir2_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - to->magic = be32_to_cpu(from->hdr.magic); - to->firstdb = be32_to_cpu(from->hdr.firstdb); - to->nvalid = be32_to_cpu(from->hdr.nvalid); - to->nused = be32_to_cpu(from->hdr.nused); - ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); -} - -static void -xfs_dir2_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); - - to->hdr.magic = cpu_to_be32(from->magic); - to->hdr.firstdb = cpu_to_be32(from->firstdb); - to->hdr.nvalid = cpu_to_be32(from->nvalid); - to->hdr.nused = cpu_to_be32(from->nused); -} - -static void -xfs_dir3_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; - - to->magic = be32_to_cpu(hdr3->hdr.magic); - to->firstdb = be32_to_cpu(hdr3->firstdb); - to->nvalid = be32_to_cpu(hdr3->nvalid); - to->nused = be32_to_cpu(hdr3->nused); - - ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); -} - -static void -xfs_dir3_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); - - hdr3->hdr.magic = cpu_to_be32(from->magic); - hdr3->firstdb = cpu_to_be32(from->firstdb); - hdr3->nvalid = cpu_to_be32(from->nvalid); - hdr3->nused = cpu_to_be32(from->nused); -} - -static const struct xfs_dir_ops xfs_dir2_ops = { - .sf_entsize = xfs_dir2_sf_entsize, - .sf_nextentry = xfs_dir2_sf_nextentry, - .sf_get_ftype = xfs_dir2_sfe_get_ftype, - .sf_put_ftype = xfs_dir2_sfe_put_ftype, - .sf_get_ino = xfs_dir2_sfe_get_ino, - .sf_put_ino = xfs_dir2_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir2_data_entsize, - .data_get_ftype = xfs_dir2_data_get_ftype, - .data_put_ftype = xfs_dir2_data_put_ftype, - .data_entry_tag_p = xfs_dir2_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_ftype_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir3_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir3_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), - - .data_dot_entry_p = xfs_dir3_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir3_data_first_entry_p, - .data_entry_p = xfs_dir3_data_entry_p, - .data_unused_p = xfs_dir3_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir3_max_leaf_ents, - .leaf_ents_p = xfs_dir3_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), - .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, - .free_max_bests = xfs_dir3_free_max_bests, - .free_bests_p = xfs_dir3_free_bests_p, - .db_to_fdb = xfs_dir3_db_to_fdb, - .db_to_fdindex = xfs_dir3_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, -}; - -static const struct xfs_dir_ops xfs_dir3_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, -}; - -/* - * Return the ops structure according to the current config. If we are passed - * an inode, then that overrides the default config we use which is based on - * feature bits. - */ -const struct xfs_dir_ops * -xfs_dir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_dir_inode_ops) - return mp->m_dir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_ops; - if (xfs_sb_version_hasftype(&mp->m_sb)) - return &xfs_dir2_ftype_ops; - return &xfs_dir2_ops; -} - -const struct xfs_dir_ops * -xfs_nondir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_nondir_inode_ops) - return mp->m_nondir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_nondir_ops; - return &xfs_dir2_nondir_ops; -} diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index ae654e06b2fb..3dee33043e09 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -94,19 +94,6 @@ struct xfs_da3_intnode { }; /* - * In-core version of the node header to abstract the differences in the v2 and - * v3 disk format of the headers. Callers need to convert to/from disk format as - * appropriate. - */ -struct xfs_da3_icnode_hdr { - uint32_t forw; - uint32_t back; - uint16_t magic; - uint16_t count; - uint16_t level; -}; - -/* * Directory version 2. * * There are 4 possible formats: @@ -434,14 +421,6 @@ struct xfs_dir3_leaf_hdr { __be32 pad; /* 64 bit alignment */ }; -struct xfs_dir3_icleaf_hdr { - uint32_t forw; - uint32_t back; - uint16_t magic; - uint16_t count; - uint16_t stale; -}; - /* * Leaf block entry. */ @@ -482,7 +461,7 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) } /* - * Free space block defintions for the node format. + * Free space block definitions for the node format. */ /* @@ -521,19 +500,6 @@ struct xfs_dir3_free { #define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc) /* - * In core version of the free block header, abstracted away from on-disk format - * differences. Use this in the code, and convert to/from the disk version using - * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk. - */ -struct xfs_dir3_icfree_hdr { - uint32_t magic; - uint32_t firstdb; - uint32_t nvalid; - uint32_t nused; - -}; - -/* * Single block format. * * The single block format looks like the following drawing on disk: @@ -710,29 +676,6 @@ struct xfs_attr3_leafblock { }; /* - * incore, neutral version of the attribute leaf header - */ -struct xfs_attr3_icleaf_hdr { - uint32_t forw; - uint32_t back; - uint16_t magic; - uint16_t count; - uint16_t usedbytes; - /* - * firstused is 32-bit here instead of 16-bit like the on-disk variant - * to support maximum fsb size of 64k without overflow issues throughout - * the attr code. Instead, the overflow condition is handled on - * conversion to/from disk. - */ - uint32_t firstused; - __u8 holes; - struct { - uint16_t base; - uint16_t size; - } freemap[XFS_ATTR_LEAF_MAPSIZE]; -}; - -/* * Special value to represent fs block size in the leaf header firstused field. * Only used when block size overflows the 2-bytes available on disk. */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 867c5dee0751..0aa87cbde49e 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -52,7 +52,7 @@ xfs_mode_to_ftype( * ASCII case-insensitive (ie. A-Z) support for directories that was * used in IRIX. */ -STATIC xfs_dahash_t +xfs_dahash_t xfs_ascii_ci_hashname( struct xfs_name *name) { @@ -65,14 +65,14 @@ xfs_ascii_ci_hashname( return hash; } -STATIC enum xfs_dacmp +enum xfs_dacmp xfs_ascii_ci_compname( - struct xfs_da_args *args, - const unsigned char *name, - int len) + struct xfs_da_args *args, + const unsigned char *name, + int len) { - enum xfs_dacmp result; - int i; + enum xfs_dacmp result; + int i; if (args->namelen != len) return XFS_CMP_DIFFERENT; @@ -89,26 +89,16 @@ xfs_ascii_ci_compname( return result; } -static const struct xfs_nameops xfs_ascii_ci_nameops = { - .hashname = xfs_ascii_ci_hashname, - .compname = xfs_ascii_ci_compname, -}; - int xfs_da_mount( struct xfs_mount *mp) { struct xfs_da_geometry *dageo; - int nodehdr_size; ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); - mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); - mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); - - nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), KM_MAYFAIL); mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), @@ -125,6 +115,27 @@ xfs_da_mount( dageo->fsblog = mp->m_sb.sb_blocklog; dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + dageo->node_hdr_size = sizeof(struct xfs_da3_node_hdr); + dageo->leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr); + dageo->free_hdr_size = sizeof(struct xfs_dir3_free_hdr); + dageo->data_entry_offset = + sizeof(struct xfs_dir3_data_hdr); + } else { + dageo->node_hdr_size = sizeof(struct xfs_da_node_hdr); + dageo->leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr); + dageo->free_hdr_size = sizeof(struct xfs_dir2_free_hdr); + dageo->data_entry_offset = + sizeof(struct xfs_dir2_data_hdr); + } + dageo->leaf_max_ents = (dageo->blksize - dageo->leaf_hdr_size) / + sizeof(struct xfs_dir2_leaf_entry); + dageo->free_max_bests = (dageo->blksize - dageo->free_hdr_size) / + sizeof(xfs_dir2_data_off_t); + + dageo->data_first_offset = dageo->data_entry_offset + + xfs_dir2_data_entsize(mp, 1) + + xfs_dir2_data_entsize(mp, 2); /* * Now we've set up the block conversion variables, we can calculate the @@ -133,7 +144,7 @@ xfs_da_mount( dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); - dageo->node_ents = (dageo->blksize - nodehdr_size) / + dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / (uint)sizeof(xfs_da_node_entry_t); dageo->magicpct = (dageo->blksize * 37) / 100; @@ -143,15 +154,10 @@ xfs_da_mount( dageo->fsblog = mp->m_sb.sb_blocklog; dageo->blksize = 1 << dageo->blklog; dageo->fsbcount = 1; - dageo->node_ents = (dageo->blksize - nodehdr_size) / + dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size; + dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / (uint)sizeof(xfs_da_node_entry_t); dageo->magicpct = (dageo->blksize * 37) / 100; - - if (xfs_sb_version_hasasciici(&mp->m_sb)) - mp->m_dirnameops = &xfs_ascii_ci_nameops; - else - mp->m_dirnameops = &xfs_default_nameops; - return 0; } @@ -191,10 +197,10 @@ xfs_dir_ino_validate( { bool ino_ok = xfs_verify_dir_ino(mp, ino); - if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) { + if (XFS_IS_CORRUPT(mp, !ino_ok) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) { xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); - XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } return 0; @@ -262,7 +268,7 @@ xfs_dir_createname( args->name = name->name; args->namelen = name->len; args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->hashval = xfs_dir2_hashname(dp->i_mount, name); args->inumber = inum; args->dp = dp; args->total = total; @@ -358,7 +364,7 @@ xfs_dir_lookup( args->name = name->name; args->namelen = name->len; args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->hashval = xfs_dir2_hashname(dp->i_mount, name); args->dp = dp; args->whichfork = XFS_DATA_FORK; args->trans = tp; @@ -430,7 +436,7 @@ xfs_dir_removename( args->name = name->name; args->namelen = name->len; args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->hashval = xfs_dir2_hashname(dp->i_mount, name); args->inumber = ino; args->dp = dp; args->total = total; @@ -491,7 +497,7 @@ xfs_dir_replace( args->name = name->name; args->namelen = name->len; args->filetype = name->type; - args->hashval = dp->i_mount->m_dirnameops->hashname(name); + args->hashval = xfs_dir2_hashname(dp->i_mount, name); args->inumber = inum; args->dp = dp; args->total = total; @@ -600,7 +606,9 @@ xfs_dir2_isblock( if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) + if (XFS_IS_CORRUPT(args->dp->i_mount, + rval != 0 && + args->dp->i_d.di_size != args->geo->blksize)) return -EFSCORRUPTED; *vp = rval; return 0; diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index f54244779492..033777e282f2 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -18,6 +18,8 @@ struct xfs_dir2_sf_entry; struct xfs_dir2_data_hdr; struct xfs_dir2_data_entry; struct xfs_dir2_data_unused; +struct xfs_dir3_icfree_hdr; +struct xfs_dir3_icleaf_hdr; extern struct xfs_name xfs_name_dotdot; @@ -27,85 +29,6 @@ extern struct xfs_name xfs_name_dotdot; extern unsigned char xfs_mode_to_ftype(int mode); /* - * directory operations vector for encode/decode routines - */ -struct xfs_dir_ops { - int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len); - struct xfs_dir2_sf_entry * - (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep); - uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); - void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, - uint8_t ftype); - xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep); - void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino); - xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr); - void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino); - - int (*data_entsize)(int len); - uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); - void (*data_put_ftype)(struct xfs_dir2_data_entry *dep, - uint8_t ftype); - __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); - struct xfs_dir2_data_free * - (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); - - xfs_dir2_data_aoff_t data_dot_offset; - xfs_dir2_data_aoff_t data_dotdot_offset; - xfs_dir2_data_aoff_t data_first_offset; - size_t data_entry_offset; - - struct xfs_dir2_data_entry * - (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_unused * - (*data_unused_p)(struct xfs_dir2_data_hdr *hdr); - - int leaf_hdr_size; - void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from); - void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from); - int (*leaf_max_ents)(struct xfs_da_geometry *geo); - struct xfs_dir2_leaf_entry * - (*leaf_ents_p)(struct xfs_dir2_leaf *lp); - - int node_hdr_size; - void (*node_hdr_to_disk)(struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from); - void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from); - struct xfs_da_node_entry * - (*node_tree_p)(struct xfs_da_intnode *dap); - - int free_hdr_size; - void (*free_hdr_to_disk)(struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from); - void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from); - int (*free_max_bests)(struct xfs_da_geometry *geo); - __be16 * (*free_bests_p)(struct xfs_dir2_free *free); - xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo, - xfs_dir2_db_t db); - int (*db_to_fdindex)(struct xfs_da_geometry *geo, - xfs_dir2_db_t db); -}; - -extern const struct xfs_dir_ops * - xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); -extern const struct xfs_dir_ops * - xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); - -/* * Generic directory interface routines */ extern void xfs_dir_startup(void); @@ -124,6 +47,8 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino, xfs_extlen_t tot); +extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp, + xfs_ino_t inum); extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t tot); @@ -143,10 +68,7 @@ extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); -extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo, - const struct xfs_dir_ops *ops, - struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_freescan(struct xfs_inode *dp, +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr, int *loghead); extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); @@ -324,7 +246,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) #define XFS_READDIR_BUFSIZE (32768) unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); -void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, +unsigned int xfs_dir3_data_end_offset(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr); bool xfs_dir2_namecheck(const void *name, size_t length); diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 49e4bc39e7bb..d6ced59b9567 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -123,7 +123,7 @@ xfs_dir3_block_read( struct xfs_mount *mp = dp->i_mount; int err; - err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, + err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, 0, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); @@ -172,7 +172,7 @@ xfs_dir2_block_need_space( struct xfs_dir2_data_unused *enddup = NULL; *compact = 0; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); /* * If there are stale entries we'll use one for the leaf. @@ -311,7 +311,7 @@ xfs_dir2_block_compact( * This needs to happen before the next call to use_free. */ if (needscan) - xfs_dir2_data_freescan(args->dp, hdr, needlog); + xfs_dir2_data_freescan(args->dp->i_mount, hdr, needlog); } /* @@ -355,7 +355,7 @@ xfs_dir2_block_addname( if (error) return error; - len = dp->d_ops->data_entsize(args->namelen); + len = xfs_dir2_data_entsize(dp->i_mount, args->namelen); /* * Set up pointers to parts of the block. @@ -458,7 +458,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); needscan = 0; } /* @@ -541,14 +541,14 @@ xfs_dir2_block_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype); + tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, bp); xfs_dir2_block_log_tail(tp, bp); @@ -633,7 +633,7 @@ xfs_dir2_block_lookup( * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); return error; @@ -660,13 +660,11 @@ xfs_dir2_block_lookup_int( int high; /* binary search high index */ int low; /* binary search low index */ int mid; /* binary search current idx */ - xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ enum xfs_dacmp cmp; /* comparison result */ dp = args->dp; tp = args->trans; - mp = dp->i_mount; error = xfs_dir3_block_read(tp, dp, &bp); if (error) @@ -718,7 +716,7 @@ xfs_dir2_block_lookup_int( * and buffer. If it's the first case-insensitive match, store * the index and buffer and continue looking for an exact match. */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + cmp = xfs_dir2_compname(args, dep->name, dep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; *bpp = bp; @@ -791,7 +789,8 @@ xfs_dir2_block_removename( needlog = needscan = 0; xfs_dir2_data_make_free(args, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog, + &needscan); /* * Fix up the block tail. */ @@ -806,7 +805,7 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, bp); xfs_dir3_data_check(dp, bp); @@ -864,7 +863,7 @@ xfs_dir2_block_replace( * Change the inode number to the new value. */ dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype); xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; @@ -914,7 +913,6 @@ xfs_dir2_leaf_to_block( __be16 *tagp; /* end of entry (tag) */ int to; /* block/leaf to index */ xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; trace_xfs_dir2_leaf_to_block(args); @@ -923,8 +921,7 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf); ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || @@ -938,7 +935,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > args->geo->blksize) { int hdrsz; - hdrsz = dp->d_ops->data_entry_offset; + hdrsz = args->geo->data_entry_offset; bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == args->geo->blksize - hdrsz) { @@ -953,7 +950,7 @@ xfs_dir2_leaf_to_block( * Read the data block if we don't already have it, give up if it fails. */ if (!dbp) { - error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); + error = xfs_dir3_data_read(tp, dp, args->geo->datablk, 0, &dbp); if (error) return error; } @@ -1004,9 +1001,10 @@ xfs_dir2_leaf_to_block( */ lep = xfs_dir2_block_leaf_p(btp); for (from = to = 0; from < leafhdr.count; from++) { - if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (leafhdr.ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; - lep[to++] = ents[from]; + lep[to++] = leafhdr.ents[from]; } ASSERT(to == be32_to_cpu(btp->count)); xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); @@ -1014,7 +1012,7 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, dbp); /* @@ -1039,47 +1037,38 @@ xfs_dir2_leaf_to_block( */ int /* error */ xfs_dir2_sf_to_block( - xfs_da_args_t *args) /* operation arguments */ + struct xfs_da_args *args) { + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); + struct xfs_da_geometry *geo = args->geo; xfs_dir2_db_t blkno; /* dir-relative block # (0) */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int endoffset; /* end of data objects */ int error; /* error return value */ int i; /* index */ - xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log block header */ int needscan; /* need to scan block freespc */ int newoffset; /* offset from current entry */ - int offset; /* target block offset */ + unsigned int offset = geo->data_entry_offset; xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ xfs_dir2_sf_hdr_t *sfp; /* shortform header */ __be16 *tagp; /* end of data entry */ - xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; - struct xfs_ifork *ifp; trace_xfs_dir2_sf_to_block(args); - dp = args->dp; - tp = args->trans; - mp = dp->i_mount; - ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); ASSERT(ifp->if_flags & XFS_IFINLINE); - /* - * Bomb out if the shortform directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return -EIO; - } + ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; @@ -1123,7 +1112,7 @@ xfs_dir2_sf_to_block( * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = dp->d_ops->data_unused_p(hdr); + dup = bp->b_addr + offset; needlog = needscan = 0; error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, i, &needlog, &needscan); @@ -1146,35 +1135,37 @@ xfs_dir2_sf_to_block( be16_to_cpu(dup->length), &needlog, &needscan); if (error) goto out_free; + /* * Create entry for . */ - dep = dp->d_ops->data_dot_entry_p(hdr); + dep = bp->b_addr + offset; dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR); + tagp = xfs_dir2_data_entry_tag_p(mp, dep); + *tagp = cpu_to_be16(offset); xfs_dir2_data_log_entry(args, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); - blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); + blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(offset)); + offset += xfs_dir2_data_entsize(mp, dep->namelen); + /* * Create entry for .. */ - dep = dp->d_ops->data_dotdot_entry_p(hdr); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); + dep = bp->b_addr + offset; + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR); + tagp = xfs_dir2_data_entry_tag_p(mp, dep); + *tagp = cpu_to_be16(offset); xfs_dir2_data_log_entry(args, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); - blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); - offset = dp->d_ops->data_first_offset; + blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(offset)); + offset += xfs_dir2_data_entsize(mp, dep->namelen); + /* * Loop over existing entries, stuff them in. */ @@ -1183,6 +1174,7 @@ xfs_dir2_sf_to_block( sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); + /* * Need to preserve the existing offset values in the sf directory. * Insert holes (unused entries) where necessary. @@ -1199,40 +1191,39 @@ xfs_dir2_sf_to_block( * There should be a hole here, make one. */ if (offset < newoffset) { - dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); + dup = bp->b_addr + offset; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); - *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( - ((char *)dup - (char *)hdr)); + *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(offset); xfs_dir2_data_log_unused(args, bp, dup); xfs_dir2_data_freeinsert(hdr, - dp->d_ops->data_bestfree_p(hdr), - dup, &dummy); + xfs_dir2_data_bestfree_p(mp, hdr), + dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ - dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); + dep = bp->b_addr + newoffset; + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_ino(mp, sfp, sfep)); dep->namelen = sfep->namelen; - dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); + xfs_dir2_data_put_ftype(mp, dep, + xfs_dir2_sf_get_ftype(mp, sfep)); memcpy(dep->name, sfep->name, dep->namelen); - tagp = dp->d_ops->data_entry_tag_p(dep); - *tagp = cpu_to_be16((char *)dep - (char *)hdr); + tagp = xfs_dir2_data_entry_tag_p(mp, dep); + *tagp = cpu_to_be16(newoffset); xfs_dir2_data_log_entry(args, bp, dep); name.name = sfep->name; name.len = sfep->namelen; - blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> - hashname(&name)); - blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( - (char *)dep - (char *)hdr)); + blp[2 + i].hashval = cpu_to_be32(xfs_dir2_hashname(mp, &name)); + blp[2 + i].address = + cpu_to_be32(xfs_dir2_byte_to_dataptr(newoffset)); offset = (int)((char *)(tagp + 1) - (char *)hdr); if (++i == sfp->count) sfep = NULL; else - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } /* Done with the temporary buffer */ kmem_free(sfp); diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 2c79be4c3153..b9eba8213180 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -13,6 +13,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_dir2.h" +#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" @@ -23,6 +24,71 @@ static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_unused *dup, struct xfs_dir2_data_free **bf_ent); +struct xfs_dir2_data_free * +xfs_dir2_data_bestfree_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return ((struct xfs_dir3_data_hdr *)hdr)->best_free; + return hdr->bestfree; +} + +/* + * Pointer to an entry's tag word. + */ +__be16 * +xfs_dir2_data_entry_tag_p( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(mp, dep->namelen) - sizeof(__be16)); +} + +uint8_t +xfs_dir2_data_get_ftype( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep) +{ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + uint8_t ftype = dep->name[dep->namelen]; + + if (likely(ftype < XFS_DIR3_FT_MAX)) + return ftype; + } + + return XFS_DIR3_FT_UNKNOWN; +} + +void +xfs_dir2_data_put_ftype( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep, + uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); + ASSERT(dep->namelen != 0); + + if (xfs_sb_version_hasftype(&mp->m_sb)) + dep->name[dep->namelen] = ftype; +} + +/* + * The number of leaf entries is limited by the size of the block and the amount + * of space used by the data entries. We don't know how much space is used by + * the data entries yet, so just ensure that the count falls somewhere inside + * the block right now. + */ +static inline unsigned int +xfs_dir2_data_max_leaf_entries( + struct xfs_da_geometry *geo) +{ + return (geo->blksize - sizeof(struct xfs_dir2_block_tail) - + geo->data_entry_offset) / + sizeof(struct xfs_dir2_leaf_entry); +} + /* * Check the consistency of the data block. * The input can also be a block-format directory. @@ -38,40 +104,27 @@ __xfs_dir3_data_check( xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ - xfs_dir2_data_unused_t *dup; /* unused entry */ - char *endp; /* end of useful data */ int freeseen; /* mask of bestfrees seen */ xfs_dahash_t hash; /* hash of current name */ int i; /* leaf index */ int lastfree; /* last entry was unused */ xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ struct xfs_mount *mp = bp->b_mount; - char *p; /* current data position */ int stale; /* count of stale leaves */ struct xfs_name name; - const struct xfs_dir_ops *ops; - struct xfs_da_geometry *geo; - - geo = mp->m_dir_geo; + unsigned int offset; + unsigned int end; + struct xfs_da_geometry *geo = mp->m_dir_geo; /* - * We can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. + * If this isn't a directory, something is seriously wrong. Bail out. */ - ops = xfs_dir_get_ops(mp, dp); - - /* - * If this isn't a directory, or we don't get handed the dir ops, - * something is seriously wrong. Bail out. - */ - if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || - ops != xfs_dir_get_ops(mp, NULL)) + if (dp && !S_ISDIR(VFS_I(dp)->i_mode)) return __this_address; hdr = bp->b_addr; - p = (char *)ops->data_entry_p(hdr); + offset = geo->data_entry_offset; switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): @@ -79,15 +132,8 @@ __xfs_dir3_data_check( btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); - /* - * The number of leaf entries is limited by the size of the - * block and the amount of space used by the data entries. - * We don't know how much space is used by the data entries yet, - * so just ensure that the count falls somewhere inside the - * block right now. - */ if (be32_to_cpu(btp->count) >= - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) + xfs_dir2_data_max_leaf_entries(geo)) return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): @@ -96,14 +142,14 @@ __xfs_dir3_data_check( default: return __this_address; } - endp = xfs_dir3_data_endp(geo, hdr); - if (!endp) + end = xfs_dir3_data_end_offset(geo, hdr); + if (!end) return __this_address; /* * Account for zero bestfree entries. */ - bf = ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(mp, hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { if (bf[0].offset) @@ -128,8 +174,10 @@ __xfs_dir3_data_check( /* * Loop over the data/unused entries. */ - while (p < endp) { - dup = (xfs_dir2_data_unused_t *)p; + while (offset < end) { + struct xfs_dir2_data_unused *dup = bp->b_addr + offset; + struct xfs_dir2_data_entry *dep = bp->b_addr + offset; + /* * If it's unused, look for the space in the bestfree table. * If we find it, account for that, else make sure it @@ -140,10 +188,10 @@ __xfs_dir3_data_check( if (lastfree != 0) return __this_address; - if (endp < p + be16_to_cpu(dup->length)) + if (offset + be16_to_cpu(dup->length) > end) return __this_address; if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != - (char *)dup - (char *)hdr) + offset) return __this_address; fa = xfs_dir2_data_freefind_verify(hdr, bf, dup, &dfp); if (fa) @@ -158,7 +206,7 @@ __xfs_dir3_data_check( be16_to_cpu(bf[2].length)) return __this_address; } - p += be16_to_cpu(dup->length); + offset += be16_to_cpu(dup->length); lastfree = 1; continue; } @@ -168,17 +216,15 @@ __xfs_dir3_data_check( * in the leaf section of the block. * The linear search is crude but this is DEBUG code. */ - dep = (xfs_dir2_data_entry_t *)p; if (dep->namelen == 0) return __this_address; if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) return __this_address; - if (endp < p + ops->data_entsize(dep->namelen)) + if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end) return __this_address; - if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != - (char *)dep - (char *)hdr) + if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) return __this_address; - if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) + if (xfs_dir2_data_get_ftype(mp, dep) >= XFS_DIR3_FT_MAX) return __this_address; count++; lastfree = 0; @@ -189,7 +235,7 @@ __xfs_dir3_data_check( ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; - hash = mp->m_dirnameops->hashname(&name); + hash = xfs_dir2_hashname(mp, &name); for (i = 0; i < be32_to_cpu(btp->count); i++) { if (be32_to_cpu(lep[i].address) == addr && be32_to_cpu(lep[i].hashval) == hash) @@ -198,7 +244,7 @@ __xfs_dir3_data_check( if (i >= be32_to_cpu(btp->count)) return __this_address; } - p += ops->data_entsize(dep->namelen); + offset += xfs_dir2_data_entsize(mp, dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. @@ -354,13 +400,13 @@ xfs_dir3_data_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mapped_bno, + unsigned int flags, struct xfs_buf **bpp) { int err; - err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, - XFS_DATA_FORK, &xfs_dir3_data_buf_ops); + err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK, + &xfs_dir3_data_buf_ops); if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; @@ -370,10 +416,10 @@ int xfs_dir3_data_readahead( struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mapped_bno) + unsigned int flags) { - return xfs_da_reada_buf(dp, bno, mapped_bno, - XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); + return xfs_da_reada_buf(dp, bno, flags, XFS_DATA_FORK, + &xfs_dir3_data_reada_buf_ops); } /* @@ -561,17 +607,16 @@ xfs_dir2_data_freeremove( * Given a data block, reconstruct its bestfree map. */ void -xfs_dir2_data_freescan_int( - struct xfs_da_geometry *geo, - const struct xfs_dir_ops *ops, - struct xfs_dir2_data_hdr *hdr, - int *loghead) +xfs_dir2_data_freescan( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr, + int *loghead) { - xfs_dir2_data_entry_t *dep; /* active data entry */ - xfs_dir2_data_unused_t *dup; /* unused data entry */ - struct xfs_dir2_data_free *bf; - char *endp; /* end of block's data */ - char *p; /* current entry pointer */ + struct xfs_da_geometry *geo = mp->m_dir_geo; + struct xfs_dir2_data_free *bf = xfs_dir2_data_bestfree_p(mp, hdr); + void *addr = hdr; + unsigned int offset = geo->data_entry_offset; + unsigned int end; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -581,79 +626,60 @@ xfs_dir2_data_freescan_int( /* * Start by clearing the table. */ - bf = ops->data_bestfree_p(hdr); memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; - /* - * Set up pointers. - */ - p = (char *)ops->data_entry_p(hdr); - endp = xfs_dir3_data_endp(geo, hdr); - /* - * Loop over the block's entries. - */ - while (p < endp) { - dup = (xfs_dir2_data_unused_t *)p; + + end = xfs_dir3_data_end_offset(geo, addr); + while (offset < end) { + struct xfs_dir2_data_unused *dup = addr + offset; + struct xfs_dir2_data_entry *dep = addr + offset; + /* * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ASSERT((char *)dup - (char *)hdr == + ASSERT(offset == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); - p += be16_to_cpu(dup->length); + offset += be16_to_cpu(dup->length); + continue; } + /* * For active entries, check their tags and skip them. */ - else { - dep = (xfs_dir2_data_entry_t *)p; - ASSERT((char *)dep - (char *)hdr == - be16_to_cpu(*ops->data_entry_tag_p(dep))); - p += ops->data_entsize(dep->namelen); - } + ASSERT(offset == + be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep))); + offset += xfs_dir2_data_entsize(mp, dep->namelen); } } -void -xfs_dir2_data_freescan( - struct xfs_inode *dp, - struct xfs_dir2_data_hdr *hdr, - int *loghead) -{ - return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops, - hdr, loghead); -} - /* * Initialize a data block at the given block number in the directory. * Give back the buffer for the created block. */ int /* error */ xfs_dir3_data_init( - xfs_da_args_t *args, /* directory operation args */ - xfs_dir2_db_t blkno, /* logical dir block number */ - struct xfs_buf **bpp) /* output block buffer */ + struct xfs_da_args *args, /* directory operation args */ + xfs_dir2_db_t blkno, /* logical dir block number */ + struct xfs_buf **bpp) /* output block buffer */ { - struct xfs_buf *bp; /* block buffer */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* unused entry pointer */ - struct xfs_dir2_data_free *bf; - int error; /* error return value */ - int i; /* bestfree index */ - xfs_mount_t *mp; /* filesystem mount point */ - xfs_trans_t *tp; /* transaction pointer */ - int t; /* temp */ - - dp = args->dp; - mp = dp->i_mount; - tp = args->trans; + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_geometry *geo = args->geo; + struct xfs_buf *bp; + struct xfs_dir2_data_hdr *hdr; + struct xfs_dir2_data_unused *dup; + struct xfs_dir2_data_free *bf; + int error; + int i; + /* * Get the buffer set up for the block. */ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), - -1, &bp, XFS_DATA_FORK); + &bp, XFS_DATA_FORK); if (error) return error; bp->b_ops = &xfs_dir3_data_buf_ops; @@ -675,8 +701,9 @@ xfs_dir3_data_init( } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - bf = dp->d_ops->data_bestfree_p(hdr); - bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); + bf = xfs_dir2_data_bestfree_p(mp, hdr); + bf[0].offset = cpu_to_be16(geo->data_entry_offset); + bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { bf[i].length = 0; bf[i].offset = 0; @@ -685,13 +712,11 @@ xfs_dir3_data_init( /* * Set up an unused entry for the block's body. */ - dup = dp->d_ops->data_unused_p(hdr); + dup = bp->b_addr + geo->data_entry_offset; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - - t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; - bf[0].length = cpu_to_be16(t); - dup->length = cpu_to_be16(t); + dup->length = bf[0].length; *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); + /* * Log it and return it. */ @@ -710,6 +735,7 @@ xfs_dir2_data_log_entry( struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { + struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || @@ -718,7 +744,7 @@ xfs_dir2_data_log_entry( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - + (uint)((char *)(xfs_dir2_data_entry_tag_p(mp, dep) + 1) - (char *)hdr - 1)); } @@ -739,8 +765,7 @@ xfs_dir2_data_log_header( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); #endif - xfs_trans_log_buf(args->trans, bp, 0, - args->dp->d_ops->data_entry_offset - 1); + xfs_trans_log_buf(args->trans, bp, 0, args->geo->data_entry_offset - 1); } /* @@ -789,11 +814,11 @@ xfs_dir2_data_make_free( { xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ - char *endptr; /* end of data area */ int needscan; /* need to regen bestfree */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *postdup; /* unused entry after us */ xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ + unsigned int end; struct xfs_dir2_data_free *bf; hdr = bp->b_addr; @@ -801,14 +826,14 @@ xfs_dir2_data_make_free( /* * Figure out where the end of the data area is. */ - endptr = xfs_dir3_data_endp(args->geo, hdr); - ASSERT(endptr != NULL); + end = xfs_dir3_data_end_offset(args->geo, hdr); + ASSERT(end != 0); /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > args->dp->d_ops->data_entry_offset) { + if (offset > args->geo->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -821,7 +846,7 @@ xfs_dir2_data_make_free( * If this isn't the end of the block, see if the entry after * us is free. */ - if ((char *)hdr + offset + len < endptr) { + if (offset + len < end) { postdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) @@ -834,7 +859,7 @@ xfs_dir2_data_make_free( * Previous and following entries are both free, * merge everything into a single free entry. */ - bf = args->dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ @@ -1025,7 +1050,7 @@ xfs_dir2_data_use_free( * Look up the entry in the bestfree table. */ oldlen = be16_to_cpu(dup->length); - bf = args->dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* @@ -1149,19 +1174,22 @@ corrupt: } /* Find the end of the entry data in a data/block format dir block. */ -void * -xfs_dir3_data_endp( +unsigned int +xfs_dir3_data_end_offset( struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) { + void *p; + switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): - return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + p = xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + return p - (void *)hdr; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - return (char *)hdr + geo->blksize; + return geo->blksize; default: - return NULL; + return 0; } } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a53e4585a2f3..a131b520aac7 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -24,12 +24,73 @@ * Local function declarations. */ static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, - int *indexp, struct xfs_buf **dbpp); + int *indexp, struct xfs_buf **dbpp, + struct xfs_dir3_icleaf_hdr *leafhdr); static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, struct xfs_buf *bp, int first, int last); static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, struct xfs_buf *bp); +void +xfs_dir2_leaf_hdr_from_disk( + struct xfs_mount *mp, + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_leaf *from3 = (struct xfs_dir3_leaf *)from; + + to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); + to->back = be32_to_cpu(from3->hdr.info.hdr.back); + to->magic = be16_to_cpu(from3->hdr.info.hdr.magic); + to->count = be16_to_cpu(from3->hdr.count); + to->stale = be16_to_cpu(from3->hdr.stale); + to->ents = from3->__ents; + + ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || + to->magic == XFS_DIR3_LEAFN_MAGIC); + } else { + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.count); + to->stale = be16_to_cpu(from->hdr.stale); + to->ents = from->__ents; + + ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || + to->magic == XFS_DIR2_LEAFN_MAGIC); + } +} + +void +xfs_dir2_leaf_hdr_to_disk( + struct xfs_mount *mp, + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_leaf *to3 = (struct xfs_dir3_leaf *)to; + + ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || + from->magic == XFS_DIR3_LEAFN_MAGIC); + + to3->hdr.info.hdr.forw = cpu_to_be32(from->forw); + to3->hdr.info.hdr.back = cpu_to_be32(from->back); + to3->hdr.info.hdr.magic = cpu_to_be16(from->magic); + to3->hdr.count = cpu_to_be16(from->count); + to3->hdr.stale = cpu_to_be16(from->stale); + } else { + ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || + from->magic == XFS_DIR2_LEAFN_MAGIC); + + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.count = cpu_to_be16(from->count); + to->hdr.stale = cpu_to_be16(from->stale); + } +} + /* * Check the internal consistency of a leaf1 block. * Pop an assert if something is wrong. @@ -43,7 +104,7 @@ xfs_dir3_leaf1_check( struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -52,7 +113,7 @@ xfs_dir3_leaf1_check( } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) return __this_address; - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(dp->i_mount, &leafhdr, leaf); } static inline void @@ -76,31 +137,15 @@ xfs_dir3_leaf_check( xfs_failaddr_t xfs_dir3_leaf_check_int( - struct xfs_mount *mp, - struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, - struct xfs_dir2_leaf *leaf) + struct xfs_mount *mp, + struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf) { - struct xfs_dir2_leaf_entry *ents; - xfs_dir2_leaf_tail_t *ltp; - int stale; - int i; - const struct xfs_dir_ops *ops; - struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_da_geometry *geo = mp->m_dir_geo; - - /* - * we can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. - */ - ops = xfs_dir_get_ops(mp, dp); + struct xfs_da_geometry *geo = mp->m_dir_geo; + xfs_dir2_leaf_tail_t *ltp; + int stale; + int i; - if (!hdr) { - ops->leaf_hdr_from_disk(&leafhdr, leaf); - hdr = &leafhdr; - } - - ents = ops->leaf_ents_p(leaf); ltp = xfs_dir2_leaf_tail_p(geo, leaf); /* @@ -108,23 +153,23 @@ xfs_dir3_leaf_check_int( * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > ops->leaf_max_ents(geo)) + if (hdr->count > geo->leaf_max_ents) return __this_address; /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) + (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) return __this_address; /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { - if (be32_to_cpu(ents[i].hashval) > - be32_to_cpu(ents[i + 1].hashval)) + if (be32_to_cpu(hdr->ents[i].hashval) > + be32_to_cpu(hdr->ents[i + 1].hashval)) return __this_address; } - if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (hdr->stale != stale) @@ -139,17 +184,18 @@ xfs_dir3_leaf_check_int( */ static xfs_failaddr_t xfs_dir3_leaf_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_mount; - struct xfs_dir2_leaf *leaf = bp->b_addr; - xfs_failaddr_t fa; + struct xfs_mount *mp = bp->b_mount; + struct xfs_dir3_icleaf_hdr leafhdr; + xfs_failaddr_t fa; fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); if (fa) return fa; - return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); + xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, bp->b_addr); + return xfs_dir3_leaf_check_int(mp, &leafhdr, bp->b_addr); } static void @@ -216,13 +262,12 @@ xfs_dir3_leaf_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t fbno, - xfs_daddr_t mappedbno, struct xfs_buf **bpp) { int err; - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); + err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK, + &xfs_dir3_leaf1_buf_ops); if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); return err; @@ -233,13 +278,12 @@ xfs_dir3_leafn_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t fbno, - xfs_daddr_t mappedbno, struct xfs_buf **bpp) { int err; - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); + err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK, + &xfs_dir3_leafn_buf_ops); if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); return err; @@ -311,7 +355,7 @@ xfs_dir3_leaf_get_buf( bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), - -1, &bp, XFS_DATA_FORK); + &bp, XFS_DATA_FORK); if (error) return error; @@ -346,7 +390,6 @@ xfs_dir2_block_to_leaf( int needscan; /* need to rescan bestfree */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir2_data_free *bf; - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; trace_xfs_dir2_block_to_leaf(args); @@ -375,24 +418,24 @@ xfs_dir2_block_to_leaf( xfs_dir3_data_check(dp, dbp); btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); - bf = dp->d_ops->data_bestfree_p(hdr); - ents = dp->d_ops->leaf_ents_p(leaf); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); /* * Set the counts in the leaf header. */ - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); leafhdr.count = be32_to_cpu(btp->count); leafhdr.stale = be32_to_cpu(btp->stale); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, lbp); /* * Could compact these but I think we always do the conversion * after squeezing out stale entries. */ - memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); + memcpy(leafhdr.ents, blp, + be32_to_cpu(btp->count) * sizeof(struct xfs_dir2_leaf_entry)); + xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, 0, leafhdr.count - 1); needscan = 0; needlog = 1; /* @@ -415,7 +458,7 @@ xfs_dir2_block_to_leaf( hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); /* * Set up leaf tail and bests table. */ @@ -594,7 +637,7 @@ xfs_dir2_leaf_addname( trace_xfs_dir2_leaf_addname(args); - error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp); if (error) return error; @@ -607,10 +650,10 @@ xfs_dir2_leaf_addname( index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); + ents = leafhdr.ents; bestsp = xfs_dir2_leaf_bests_p(ltp); - length = dp->d_ops->data_entsize(args->namelen); + length = xfs_dir2_data_entsize(dp->i_mount, args->namelen); /* * See if there are any entries with the same hash value @@ -773,7 +816,7 @@ xfs_dir2_leaf_addname( else xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); bestsp[use_block] = bf[0].length; grown = 1; } else { @@ -783,13 +826,13 @@ xfs_dir2_leaf_addname( */ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, use_block), - -1, &dbp); + 0, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; } hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); grown = 0; } /* @@ -815,14 +858,14 @@ xfs_dir2_leaf_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype); + tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); /* * Need to log the data block's header. */ @@ -852,9 +895,9 @@ xfs_dir2_leaf_addname( /* * Log the leaf fields and give up the buffers. */ - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, lbp); - xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); return 0; @@ -874,7 +917,6 @@ xfs_dir3_leaf_compact( xfs_dir2_leaf_t *leaf; /* leaf structure */ int loglow; /* first leaf entry to log */ int to; /* target leaf index */ - struct xfs_dir2_leaf_entry *ents; struct xfs_inode *dp = args->dp; leaf = bp->b_addr; @@ -884,9 +926,9 @@ xfs_dir3_leaf_compact( /* * Compress out the stale entries in place. */ - ents = dp->d_ops->leaf_ents_p(leaf); for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { - if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (leafhdr->ents[from].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; /* * Only actually copy the entries that are different. @@ -894,7 +936,7 @@ xfs_dir3_leaf_compact( if (from > to) { if (loglow == -1) loglow = to; - ents[to] = ents[from]; + leafhdr->ents[to] = leafhdr->ents[from]; } to++; } @@ -905,10 +947,10 @@ xfs_dir3_leaf_compact( leafhdr->count -= leafhdr->stale; leafhdr->stale = 0; - dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, leafhdr); xfs_dir3_leaf_log_header(args, bp); if (loglow != -1) - xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); + xfs_dir3_leaf_log_ents(args, leafhdr, bp, loglow, to - 1); } /* @@ -1037,6 +1079,7 @@ xfs_dir3_leaf_log_bests( void xfs_dir3_leaf_log_ents( struct xfs_da_args *args, + struct xfs_dir3_icleaf_hdr *hdr, struct xfs_buf *bp, int first, int last) @@ -1044,16 +1087,14 @@ xfs_dir3_leaf_log_ents( xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir2_leaf_entry *ents; ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ents = args->dp->d_ops->leaf_ents_p(leaf); - firstlep = &ents[first]; - lastlep = &ents[last]; + firstlep = &hdr->ents[first]; + lastlep = &hdr->ents[last]; xfs_trans_log_buf(args->trans, bp, (uint)((char *)firstlep - (char *)leaf), (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); @@ -1076,7 +1117,7 @@ xfs_dir3_leaf_log_header( xfs_trans_log_buf(args->trans, bp, (uint)((char *)&leaf->hdr - (char *)leaf), - args->dp->d_ops->leaf_hdr_size - 1); + args->geo->leaf_hdr_size - 1); } /* @@ -1115,28 +1156,27 @@ xfs_dir2_leaf_lookup( int error; /* error return code */ int index; /* found entry index */ struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; trace_xfs_dir2_leaf_lookup(args); /* * Look up name in the leaf block, returning both buffers and index. */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr); + if (error) return error; - } + tp = args->trans; dp = args->dp; xfs_dir3_leaf_check(dp, lbp); - leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); + /* * Get to the leaf entry and contained data entry address. */ - lep = &ents[index]; + lep = &leafhdr.ents[index]; /* * Point to the data entry. @@ -1148,7 +1188,7 @@ xfs_dir2_leaf_lookup( * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); @@ -1166,7 +1206,8 @@ xfs_dir2_leaf_lookup_int( xfs_da_args_t *args, /* operation arguments */ struct xfs_buf **lbpp, /* out: leaf buffer */ int *indexp, /* out: index in leaf block */ - struct xfs_buf **dbpp) /* out: data buffer */ + struct xfs_buf **dbpp, /* out: data buffer */ + struct xfs_dir3_icleaf_hdr *leafhdr) { xfs_dir2_db_t curdb = -1; /* current data block number */ struct xfs_buf *dbp = NULL; /* data buffer */ @@ -1182,22 +1223,19 @@ xfs_dir2_leaf_lookup_int( xfs_trans_t *tp; /* transaction pointer */ xfs_dir2_db_t cidb = -1; /* case match data block no. */ enum xfs_dacmp cmp; /* name compare result */ - struct xfs_dir2_leaf_entry *ents; - struct xfs_dir3_icleaf_hdr leafhdr; dp = args->dp; tp = args->trans; mp = dp->i_mount; - error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); + error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp); if (error) return error; *lbpp = lbp; leaf = lbp->b_addr; xfs_dir3_leaf_check(dp, lbp); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(mp, leafhdr, leaf); /* * Look for the first leaf entry with our hash value. @@ -1207,8 +1245,9 @@ xfs_dir2_leaf_lookup_int( * Loop over all the entries with the right hash value * looking to match the name. */ - for (lep = &ents[index]; - index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; + for (lep = &leafhdr->ents[index]; + index < leafhdr->count && + be32_to_cpu(lep->hashval) == args->hashval; lep++, index++) { /* * Skip over stale leaf entries. @@ -1229,7 +1268,7 @@ xfs_dir2_leaf_lookup_int( xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, newdb), - -1, &dbp); + 0, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1247,7 +1286,7 @@ xfs_dir2_leaf_lookup_int( * and buffer. If it's the first case-insensitive match, store * the index and buffer and continue looking for an exact match. */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + cmp = xfs_dir2_compname(args, dep->name, dep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; *indexp = index; @@ -1271,7 +1310,7 @@ xfs_dir2_leaf_lookup_int( xfs_trans_brelse(tp, dbp); error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, cidb), - -1, &dbp); + 0, &dbp); if (error) { xfs_trans_brelse(tp, lbp); return error; @@ -1297,6 +1336,7 @@ int /* error */ xfs_dir2_leaf_removename( xfs_da_args_t *args) /* operation arguments */ { + struct xfs_da_geometry *geo = args->geo; __be16 *bestsp; /* leaf block best freespace */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ @@ -1314,7 +1354,6 @@ xfs_dir2_leaf_removename( int needscan; /* need to rescan data frees */ xfs_dir2_data_off_t oldbest; /* old value of best free */ struct xfs_dir2_data_free *bf; /* bestfree table */ - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; trace_xfs_dir2_leaf_removename(args); @@ -1322,51 +1361,54 @@ xfs_dir2_leaf_removename( /* * Lookup the leaf entry, get the leaf and data blocks read in. */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr); + if (error) return error; - } + dp = args->dp; leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); - bf = dp->d_ops->data_bestfree_p(hdr); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); + /* * Point to the leaf entry, use that to point to the data entry. */ - lep = &ents[index]; - db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + lep = &leafhdr.ents[index]; + db = xfs_dir2_dataptr_to_db(geo, be32_to_cpu(lep->address)); dep = (xfs_dir2_data_entry_t *)((char *)hdr + - xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); + xfs_dir2_dataptr_to_off(geo, be32_to_cpu(lep->address))); needscan = needlog = 0; oldbest = be16_to_cpu(bf[0].length); - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); - if (be16_to_cpu(bestsp[db]) != oldbest) + if (be16_to_cpu(bestsp[db]) != oldbest) { + xfs_buf_corruption_error(lbp); return -EFSCORRUPTED; + } /* * Mark the former data entry unused. */ xfs_dir2_data_make_free(args, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog, + &needscan); /* * We just mark the leaf entry stale by putting a null in it. */ leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(args, lbp, index, index); + xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, index, index); /* * Scan the freespace in the data block again if necessary, * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, dbp); /* @@ -1382,8 +1424,8 @@ xfs_dir2_leaf_removename( * If the data block is now empty then get rid of the data block. */ if (be16_to_cpu(bf[0].length) == - args->geo->blksize - dp->d_ops->data_entry_offset) { - ASSERT(db != args->geo->datablk); + geo->blksize - geo->data_entry_offset) { + ASSERT(db != geo->datablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* * Nope, can't get rid of it because it caused @@ -1425,7 +1467,7 @@ xfs_dir2_leaf_removename( /* * If the data block was not the first one, drop it. */ - else if (db != args->geo->datablk) + else if (db != geo->datablk) dbp = NULL; xfs_dir3_leaf_check(dp, lbp); @@ -1448,26 +1490,24 @@ xfs_dir2_leaf_replace( int error; /* error return code */ int index; /* index of leaf entry */ struct xfs_buf *lbp; /* leaf buffer */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; trace_xfs_dir2_leaf_replace(args); /* * Look up the entry. */ - if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { + error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr); + if (error) return error; - } + dp = args->dp; - leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); /* * Point to the leaf entry, get data address from it. */ - lep = &ents[index]; + lep = &leafhdr.ents[index]; /* * Point to the data entry. */ @@ -1479,7 +1519,7 @@ xfs_dir2_leaf_replace( * Put the new inode number in, log it. */ dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype); tp = args->trans; xfs_dir2_data_log_entry(args, dbp, dep); xfs_dir3_leaf_check(dp, lbp); @@ -1501,21 +1541,17 @@ xfs_dir2_leaf_search_hash( xfs_dahash_t hashwant; /* hash value looking for */ int high; /* high leaf index */ int low; /* low leaf index */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ int mid=0; /* current leaf index */ - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; - leaf = lbp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); - args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(args->dp->i_mount, &leafhdr, lbp->b_addr); /* * Note, the table cannot be empty, so we have to go through the loop. * Binary search the leaf entries looking for our hash value. */ - for (lep = ents, low = 0, high = leafhdr.count - 1, + for (lep = leafhdr.ents, low = 0, high = leafhdr.count - 1, hashwant = args->hashval; low <= high; ) { mid = (low + high) >> 1; @@ -1552,6 +1588,7 @@ xfs_dir2_leaf_trim_data( struct xfs_buf *lbp, /* leaf buffer */ xfs_dir2_db_t db) /* data block number */ { + struct xfs_da_geometry *geo = args->geo; __be16 *bestsp; /* leaf bests table */ struct xfs_buf *dbp; /* data block buffer */ xfs_inode_t *dp; /* incore directory inode */ @@ -1565,23 +1602,23 @@ xfs_dir2_leaf_trim_data( /* * Read the offending data block. We need its buffer. */ - error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), - -1, &dbp); + error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(geo, db), 0, &dbp); if (error) return error; leaf = lbp->b_addr; - ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); #ifdef DEBUG { struct xfs_dir2_data_hdr *hdr = dbp->b_addr; - struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); + struct xfs_dir2_data_free *bf = + xfs_dir2_data_bestfree_p(dp->i_mount, hdr); ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); ASSERT(be16_to_cpu(bf[0].length) == - args->geo->blksize - dp->d_ops->data_entry_offset); + geo->blksize - geo->data_entry_offset); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); } #endif @@ -1639,7 +1676,6 @@ xfs_dir2_node_to_leaf( int error; /* error return code */ struct xfs_buf *fbp; /* buffer for freespace block */ xfs_fileoff_t fo; /* freespace file offset */ - xfs_dir2_free_t *free; /* freespace structure */ struct xfs_buf *lbp; /* buffer for leaf block */ xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ xfs_dir2_leaf_t *leaf; /* leaf structure */ @@ -1697,7 +1733,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -1708,8 +1744,7 @@ xfs_dir2_node_to_leaf( error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); if (error) return error; - free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(mp, &freehdr, fbp->b_addr); ASSERT(!freehdr.firstdb); @@ -1743,10 +1778,10 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf bests table. */ - memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), + memcpy(xfs_dir2_leaf_bests_p(ltp), freehdr.bests, freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(mp, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, lbp); xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir3_leaf_log_tail(args, lbp); diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 705c4f562758..a0cc5e240306 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -34,6 +34,25 @@ static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, int *rval); /* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + + (db / geo->free_max_bests); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ + return db % geo->free_max_bests; +} + +/* * Check internal consistency of a leafn block. */ #ifdef DEBUG @@ -45,7 +64,7 @@ xfs_dir3_leafn_check( struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -54,7 +73,7 @@ xfs_dir3_leafn_check( } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) return __this_address; - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(dp->i_mount, &leafhdr, leaf); } static inline void @@ -160,10 +179,9 @@ xfs_dir3_free_header_check( struct xfs_buf *bp) { struct xfs_mount *mp = dp->i_mount; + int maxbests = mp->m_dir_geo->free_max_bests; unsigned int firstdb; - int maxbests; - maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * maxbests; @@ -194,14 +212,14 @@ __xfs_dir3_free_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t fbno, - xfs_daddr_t mappedbno, + unsigned int flags, struct xfs_buf **bpp) { xfs_failaddr_t fa; int err; - err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, - XFS_DATA_FORK, &xfs_dir3_free_buf_ops); + err = xfs_da_read_buf(tp, dp, fbno, flags, bpp, XFS_DATA_FORK, + &xfs_dir3_free_buf_ops); if (err || !*bpp) return err; @@ -220,6 +238,58 @@ __xfs_dir3_free_read( return 0; } +void +xfs_dir2_free_hdr_from_disk( + struct xfs_mount *mp, + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free *from3 = (struct xfs_dir3_free *)from; + + to->magic = be32_to_cpu(from3->hdr.hdr.magic); + to->firstdb = be32_to_cpu(from3->hdr.firstdb); + to->nvalid = be32_to_cpu(from3->hdr.nvalid); + to->nused = be32_to_cpu(from3->hdr.nused); + to->bests = from3->bests; + + ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); + } else { + to->magic = be32_to_cpu(from->hdr.magic); + to->firstdb = be32_to_cpu(from->hdr.firstdb); + to->nvalid = be32_to_cpu(from->hdr.nvalid); + to->nused = be32_to_cpu(from->hdr.nused); + to->bests = from->bests; + + ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); + } +} + +static void +xfs_dir2_free_hdr_to_disk( + struct xfs_mount *mp, + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free *to3 = (struct xfs_dir3_free *)to; + + ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); + + to3->hdr.hdr.magic = cpu_to_be32(from->magic); + to3->hdr.firstdb = cpu_to_be32(from->firstdb); + to3->hdr.nvalid = cpu_to_be32(from->nvalid); + to3->hdr.nused = cpu_to_be32(from->nused); + } else { + ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); + + to->hdr.magic = cpu_to_be32(from->magic); + to->hdr.firstdb = cpu_to_be32(from->firstdb); + to->hdr.nvalid = cpu_to_be32(from->nvalid); + to->hdr.nused = cpu_to_be32(from->nused); + } +} + int xfs_dir2_free_read( struct xfs_trans *tp, @@ -227,7 +297,7 @@ xfs_dir2_free_read( xfs_dablk_t fbno, struct xfs_buf **bpp) { - return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp); + return __xfs_dir3_free_read(tp, dp, fbno, 0, bpp); } static int @@ -237,7 +307,7 @@ xfs_dir2_free_try_read( xfs_dablk_t fbno, struct xfs_buf **bpp) { - return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); + return __xfs_dir3_free_read(tp, dp, fbno, XFS_DABUF_MAP_HOLE_OK, bpp); } static int @@ -254,7 +324,7 @@ xfs_dir3_free_get_buf( struct xfs_dir3_icfree_hdr hdr; error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), - -1, &bp, XFS_DATA_FORK); + &bp, XFS_DATA_FORK); if (error) return error; @@ -278,7 +348,7 @@ xfs_dir3_free_get_buf( uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid); } else hdr.magic = XFS_DIR2_FREE_MAGIC; - dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); + xfs_dir2_free_hdr_to_disk(mp, bp->b_addr, &hdr); *bpp = bp; return 0; } @@ -289,21 +359,19 @@ xfs_dir3_free_get_buf( STATIC void xfs_dir2_free_log_bests( struct xfs_da_args *args, + struct xfs_dir3_icfree_hdr *hdr, struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ { - xfs_dir2_free_t *free; /* freespace structure */ - __be16 *bests; + struct xfs_dir2_free *free = bp->b_addr; - free = bp->b_addr; - bests = args->dp->d_ops->free_bests_p(free); ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); xfs_trans_log_buf(args->trans, bp, - (uint)((char *)&bests[first] - (char *)free), - (uint)((char *)&bests[last] - (char *)free + - sizeof(bests[0]) - 1)); + (char *)&hdr->bests[first] - (char *)free, + (char *)&hdr->bests[last] - (char *)free + + sizeof(hdr->bests[0]) - 1); } /* @@ -322,7 +390,7 @@ xfs_dir2_free_log_header( free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); #endif xfs_trans_log_buf(args->trans, bp, 0, - args->dp->d_ops->free_hdr_size - 1); + args->geo->free_hdr_size - 1); } /* @@ -339,14 +407,12 @@ xfs_dir2_leaf_to_node( int error; /* error return value */ struct xfs_buf *fbp; /* freespace buffer */ xfs_dir2_db_t fdb; /* freespace block number */ - xfs_dir2_free_t *free; /* freespace structure */ __be16 *from; /* pointer to freespace entry */ int i; /* leaf freespace index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ int n; /* count of live freespc ents */ xfs_dir2_data_off_t off; /* freespace entry value */ - __be16 *to; /* pointer to freespace entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir3_icfree_hdr freehdr; @@ -368,24 +434,25 @@ xfs_dir2_leaf_to_node( if (error) return error; - free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, fbp->b_addr); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); if (be32_to_cpu(ltp->bestcount) > - (uint)dp->i_d.di_size / args->geo->blksize) + (uint)dp->i_d.di_size / args->geo->blksize) { + xfs_buf_corruption_error(lbp); return -EFSCORRUPTED; + } /* * Copy freespace entries from the leaf block to the new block. * Count active entries. */ from = xfs_dir2_leaf_bests_p(ltp); - to = dp->d_ops->free_bests_p(free); - for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { - if ((off = be16_to_cpu(*from)) != NULLDATAOFF) + for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++) { + off = be16_to_cpu(*from); + if (off != NULLDATAOFF) n++; - *to = cpu_to_be16(off); + freehdr.bests[i] = cpu_to_be16(off); } /* @@ -394,8 +461,8 @@ xfs_dir2_leaf_to_node( freehdr.nused = n; freehdr.nvalid = be32_to_cpu(ltp->bestcount); - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_hdr_to_disk(dp->i_mount, fbp->b_addr, &freehdr); + xfs_dir2_free_log_bests(args, &freehdr, fbp, 0, freehdr.nvalid - 1); xfs_dir2_free_log_header(args, fbp); /* @@ -438,15 +505,17 @@ xfs_dir2_leafn_add( trace_xfs_dir2_leafn_add(args, index); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); + ents = leafhdr.ents; /* * Quick check just to make sure we are not going to index * into other peoples memory */ - if (index < 0) + if (index < 0) { + xfs_buf_corruption_error(bp); return -EFSCORRUPTED; + } /* * If there are already the maximum number of leaf entries in @@ -455,7 +524,7 @@ xfs_dir2_leafn_add( * a compact. */ - if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { + if (leafhdr.count == args->geo->leaf_max_ents) { if (!leafhdr.stale) return -ENOSPC; compact = leafhdr.stale > 1; @@ -493,9 +562,9 @@ xfs_dir2_leafn_add( lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, args->blkno, args->index)); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, bp); - xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_log_ents(args, &leafhdr, bp, lfloglow, lfloghigh); xfs_dir3_leaf_check(dp, bp); return 0; } @@ -509,10 +578,9 @@ xfs_dir2_free_hdr_check( { struct xfs_dir3_icfree_hdr hdr; - dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); + xfs_dir2_free_hdr_from_disk(dp->i_mount, &hdr, bp->b_addr); - ASSERT((hdr.firstdb % - dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); + ASSERT((hdr.firstdb % dp->i_mount->m_dir_geo->free_max_bests) == 0); ASSERT(hdr.firstdb <= db); ASSERT(db < hdr.firstdb + hdr.nvalid); } @@ -530,11 +598,9 @@ xfs_dir2_leaf_lasthash( struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ { - struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, bp->b_addr); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC || @@ -545,9 +611,7 @@ xfs_dir2_leaf_lasthash( *count = leafhdr.count; if (!leafhdr.count) return 0; - - ents = dp->d_ops->leaf_ents_p(leaf); - return be32_to_cpu(ents[leafhdr.count - 1].hashval); + return be32_to_cpu(leafhdr.ents[leafhdr.count - 1].hashval); } /* @@ -576,15 +640,13 @@ xfs_dir2_leafn_lookup_for_addname( xfs_dir2_db_t newdb; /* new data block number */ xfs_dir2_db_t newfdb; /* new free block number */ xfs_trans_t *tp; /* transaction pointer */ - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; dp = args->dp; tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf); xfs_dir3_leaf_check(dp, bp); ASSERT(leafhdr.count > 0); @@ -604,11 +666,11 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); } - length = dp->d_ops->data_entsize(args->namelen); + length = xfs_dir2_data_entsize(mp, args->namelen); /* * Loop over leaf entries with the right hash value. */ - for (lep = &ents[index]; + for (lep = &leafhdr.ents[index]; index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; lep++, index++) { /* @@ -630,14 +692,14 @@ xfs_dir2_leafn_lookup_for_addname( * in hand, take a look at it. */ if (newdb != curdb) { - __be16 *bests; + struct xfs_dir3_icfree_hdr freehdr; curdb = newdb; /* * Convert the data block to the free block * holding its freespace information. */ - newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); + newfdb = xfs_dir2_db_to_fdb(args->geo, newdb); /* * If it's not the one we have in hand, read it in. */ @@ -661,20 +723,20 @@ xfs_dir2_leafn_lookup_for_addname( /* * Get the index for our entry. */ - fi = dp->d_ops->db_to_fdindex(args->geo, curdb); + fi = xfs_dir2_db_to_fdindex(args->geo, curdb); /* * If it has room, return it. */ - bests = dp->d_ops->free_bests_p(free); - if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { - XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", - XFS_ERRLEVEL_LOW, mp); + xfs_dir2_free_hdr_from_disk(mp, &freehdr, free); + if (XFS_IS_CORRUPT(mp, + freehdr.bests[fi] == + cpu_to_be16(NULLDATAOFF))) { if (curfdb != newfdb) xfs_trans_brelse(tp, curbp); return -EFSCORRUPTED; } curfdb = newfdb; - if (be16_to_cpu(bests[fi]) >= length) + if (be16_to_cpu(freehdr.bests[fi]) >= length) goto out; } } @@ -728,19 +790,19 @@ xfs_dir2_leafn_lookup_for_entry( xfs_dir2_db_t newdb; /* new data block number */ xfs_trans_t *tp; /* transaction pointer */ enum xfs_dacmp cmp; /* comparison result */ - struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; dp = args->dp; tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf); xfs_dir3_leaf_check(dp, bp); - if (leafhdr.count <= 0) + if (leafhdr.count <= 0) { + xfs_buf_corruption_error(bp); return -EFSCORRUPTED; + } /* * Look up the hash value in the leaf entries. @@ -756,7 +818,7 @@ xfs_dir2_leafn_lookup_for_entry( /* * Loop over leaf entries with the right hash value. */ - for (lep = &ents[index]; + for (lep = &leafhdr.ents[index]; index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; lep++, index++) { /* @@ -795,7 +857,7 @@ xfs_dir2_leafn_lookup_for_entry( error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, newdb), - -1, &curbp); + 0, &curbp); if (error) return error; } @@ -813,7 +875,7 @@ xfs_dir2_leafn_lookup_for_entry( * EEXIST immediately. If it's the first case-insensitive * match, store the block & inode number and continue looking. */ - cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + cmp = xfs_dir2_compname(args, dep->name, dep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { /* If there is a CI match block, drop it */ if (args->cmpresult != XFS_CMP_DIFFERENT && @@ -821,7 +883,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_trans_brelse(tp, state->extrablk.bp); args->cmpresult = cmp; args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir2_data_get_ftype(mp, dep); *indexp = index; state->extravalid = 1; state->extrablk.bp = curbp; @@ -911,7 +973,7 @@ xfs_dir3_leafn_moveents( if (start_d < dhdr->count) { memmove(&dents[start_d + count], &dents[start_d], (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, + xfs_dir3_leaf_log_ents(args, dhdr, bp_d, start_d + count, count + dhdr->count - 1); } /* @@ -933,7 +995,7 @@ xfs_dir3_leafn_moveents( */ memcpy(&dents[start_d], &sents[start_s], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); + xfs_dir3_leaf_log_ents(args, dhdr, bp_d, start_d, start_d + count - 1); /* * If there are source entries after the ones we copied, @@ -942,7 +1004,8 @@ xfs_dir3_leafn_moveents( if (start_s + count < shdr->count) { memmove(&sents[start_s], &sents[start_s + count], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); + xfs_dir3_leaf_log_ents(args, shdr, bp_s, start_s, + start_s + count - 1); } /* @@ -971,10 +1034,10 @@ xfs_dir2_leafn_order( struct xfs_dir3_icleaf_hdr hdr1; struct xfs_dir3_icleaf_hdr hdr2; - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr1, leaf1); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf2); + ents1 = hdr1.ents; + ents2 = hdr2.ents; if (hdr1.count > 0 && hdr2.count > 0 && (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || @@ -1024,10 +1087,10 @@ xfs_dir2_leafn_rebalance( leaf1 = blk1->bp->b_addr; leaf2 = blk2->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr1, leaf1); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf2); + ents1 = hdr1.ents; + ents2 = hdr2.ents; oldsum = hdr1.count + hdr2.count; #if defined(DEBUG) || defined(XFS_WARN) @@ -1073,8 +1136,8 @@ xfs_dir2_leafn_rebalance( ASSERT(hdr1.stale + hdr2.stale == oldstale); /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); - dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf1, &hdr1); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf2, &hdr2); xfs_dir3_leaf_log_header(args, blk1->bp); xfs_dir3_leaf_log_header(args, blk2->bp); @@ -1120,19 +1183,17 @@ xfs_dir3_data_block_free( int longest) { int logfree = 0; - __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; struct xfs_inode *dp = args->dp; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - bests = dp->d_ops->free_bests_p(free); + xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free); if (hdr) { /* * Data block is not empty, just set the free entry to the new * value. */ - bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(args, fbp, findex, findex); + freehdr.bests[findex] = cpu_to_be16(longest); + xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex); return 0; } @@ -1148,18 +1209,18 @@ xfs_dir3_data_block_free( int i; /* free entry index */ for (i = findex - 1; i >= 0; i--) { - if (bests[i] != cpu_to_be16(NULLDATAOFF)) + if (freehdr.bests[i] != cpu_to_be16(NULLDATAOFF)) break; } freehdr.nvalid = i + 1; logfree = 0; } else { /* Not the last entry, just punch it out. */ - bests[findex] = cpu_to_be16(NULLDATAOFF); + freehdr.bests[findex] = cpu_to_be16(NULLDATAOFF); logfree = 1; } - dp->d_ops->free_hdr_to_disk(free, &freehdr); + xfs_dir2_free_hdr_to_disk(dp->i_mount, free, &freehdr); xfs_dir2_free_log_header(args, fbp); /* @@ -1184,7 +1245,7 @@ xfs_dir3_data_block_free( /* Log the free entry that changed, unless we got rid of it. */ if (logfree) - xfs_dir2_free_log_bests(args, fbp, findex, findex); + xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex); return 0; } @@ -1201,6 +1262,7 @@ xfs_dir2_leafn_remove( xfs_da_state_blk_t *dblk, /* data block */ int *rval) /* resulting block needs join */ { + struct xfs_da_geometry *geo = args->geo; xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_db_t db; /* data block number */ struct xfs_buf *dbp; /* data block buffer */ @@ -1215,27 +1277,25 @@ xfs_dir2_leafn_remove( xfs_trans_t *tp; /* transaction pointer */ struct xfs_dir2_data_free *bf; /* bestfree table */ struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; trace_xfs_dir2_leafn_remove(args, index); dp = args->dp; tp = args->trans; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); /* * Point to the entry we're removing. */ - lep = &ents[index]; + lep = &leafhdr.ents[index]; /* * Extract the data block and offset from the entry. */ - db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); + db = xfs_dir2_dataptr_to_db(geo, be32_to_cpu(lep->address)); ASSERT(dblk->blkno == db); - off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); + off = xfs_dir2_dataptr_to_off(geo, be32_to_cpu(lep->address)); ASSERT(dblk->index == off); /* @@ -1243,11 +1303,11 @@ xfs_dir2_leafn_remove( * Log the leaf block changes. */ leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr); xfs_dir3_leaf_log_header(args, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(args, bp, index, index); + xfs_dir3_leaf_log_ents(args, &leafhdr, bp, index, index); /* * Make the data entry free. Keep track of the longest freespace @@ -1256,17 +1316,18 @@ xfs_dir2_leafn_remove( dbp = dblk->bp; hdr = dbp->b_addr; dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); longest = be16_to_cpu(bf[0].length); needlog = needscan = 0; xfs_dir2_data_make_free(args, dbp, off, - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog, + &needscan); /* * Rescan the data block freespaces for bestfree. * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, dbp); xfs_dir3_data_check(dp, dbp); @@ -1285,9 +1346,8 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = dp->d_ops->db_to_fdb(args->geo, db); - error = xfs_dir2_free_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fdb), + fdb = xfs_dir2_db_to_fdb(geo, db); + error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(geo, fdb), &fbp); if (error) return error; @@ -1295,23 +1355,22 @@ xfs_dir2_leafn_remove( #ifdef DEBUG { struct xfs_dir3_icfree_hdr freehdr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * - (fdb - xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET))); + + xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free); + ASSERT(freehdr.firstdb == geo->free_max_bests * + (fdb - xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET))); } #endif /* * Calculate which entry we need to fix. */ - findex = dp->d_ops->db_to_fdindex(args->geo, db); + findex = xfs_dir2_db_to_fdindex(geo, db); longest = be16_to_cpu(bf[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ - if (longest == args->geo->blksize - - dp->d_ops->data_entry_offset) { + if (longest == geo->blksize - geo->data_entry_offset) { /* * Try to punch out the data block. */ @@ -1343,9 +1402,9 @@ xfs_dir2_leafn_remove( * Return indication of whether this leaf block is empty enough * to justify trying to join it with a neighbor. */ - *rval = (dp->d_ops->leaf_hdr_size + - (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < - args->geo->magicpct; + *rval = (geo->leaf_hdr_size + + (uint)sizeof(leafhdr.ents) * (leafhdr.count - leafhdr.stale)) < + geo->magicpct; return 0; } @@ -1444,12 +1503,12 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; leaf = blk->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); + ents = leafhdr.ents; xfs_dir3_leaf_check(dp, blk->bp); count = leafhdr.count - leafhdr.stale; - bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); + bytes = state->args->geo->leaf_hdr_size + count * sizeof(ents[0]); if (bytes > (state->args->geo->blksize >> 1)) { /* * Blk over 50%, don't try to join. @@ -1494,8 +1553,7 @@ xfs_dir2_leafn_toosmall( /* * Read the sibling leaf block. */ - error = xfs_dir3_leafn_read(state->args->trans, dp, - blkno, -1, &bp); + error = xfs_dir3_leafn_read(state->args->trans, dp, blkno, &bp); if (error) return error; @@ -1507,8 +1565,8 @@ xfs_dir2_leafn_toosmall( (state->args->geo->blksize >> 2); leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf); + ents = hdr2.ents; count += hdr2.count - hdr2.stale; bytes -= count * sizeof(ents[0]); @@ -1570,10 +1628,10 @@ xfs_dir2_leafn_unbalance( drop_leaf = drop_blk->bp->b_addr; save_leaf = save_blk->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); - dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); - sents = dp->d_ops->leaf_ents_p(save_leaf); - dents = dp->d_ops->leaf_ents_p(drop_leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &savehdr, save_leaf); + xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &drophdr, drop_leaf); + sents = savehdr.ents; + dents = drophdr.ents; /* * If there are any stale leaf entries, take this opportunity @@ -1599,8 +1657,8 @@ xfs_dir2_leafn_unbalance( save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); - dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, save_leaf, &savehdr); + xfs_dir2_leaf_hdr_to_disk(dp->i_mount, drop_leaf, &drophdr); xfs_dir3_leaf_log_header(args, save_blk->bp); xfs_dir3_leaf_log_header(args, drop_blk->bp); @@ -1619,19 +1677,16 @@ xfs_dir2_node_add_datablk( xfs_dir2_db_t *dbno, struct xfs_buf **dbpp, struct xfs_buf **fbpp, + struct xfs_dir3_icfree_hdr *hdr, int *findex) { struct xfs_inode *dp = args->dp; struct xfs_trans *tp = args->trans; struct xfs_mount *mp = dp->i_mount; - struct xfs_dir3_icfree_hdr freehdr; struct xfs_dir2_data_free *bf; - struct xfs_dir2_data_hdr *hdr; - struct xfs_dir2_free *free = NULL; xfs_dir2_db_t fbno; struct xfs_buf *fbp; struct xfs_buf *dbp; - __be16 *bests = NULL; int error; /* Not allowed to allocate, return failure. */ @@ -1650,7 +1705,7 @@ xfs_dir2_node_add_datablk( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = dp->d_ops->db_to_fdb(args->geo, *dbno); + fbno = xfs_dir2_db_to_fdb(args->geo, *dbno); error = xfs_dir2_free_try_read(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), &fbp); if (error) @@ -1665,11 +1720,13 @@ xfs_dir2_node_add_datablk( if (error) return error; - if (dp->d_ops->db_to_fdb(args->geo, *dbno) != fbno) { + if (XFS_IS_CORRUPT(mp, + xfs_dir2_db_to_fdb(args->geo, *dbno) != + fbno)) { xfs_alert(mp, "%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld", __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb(args->geo, *dbno), + (long long)xfs_dir2_db_to_fdb(args->geo, *dbno), (long long)*dbno, (long long)fbno); if (fblk) { xfs_alert(mp, @@ -1679,7 +1736,6 @@ xfs_dir2_node_add_datablk( } else { xfs_alert(mp, " ... fblk is NULL"); } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } @@ -1687,44 +1743,39 @@ xfs_dir2_node_add_datablk( error = xfs_dir3_free_get_buf(args, fbno, &fbp); if (error) return error; - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(mp, hdr, fbp->b_addr); /* Remember the first slot as our empty slot. */ - freehdr.firstdb = (fbno - xfs_dir2_byte_to_db(args->geo, + hdr->firstdb = (fbno - xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)) * - dp->d_ops->free_max_bests(args->geo); + args->geo->free_max_bests; } else { - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(mp, hdr, fbp->b_addr); } /* Set the freespace block index from the data block number. */ - *findex = dp->d_ops->db_to_fdindex(args->geo, *dbno); + *findex = xfs_dir2_db_to_fdindex(args->geo, *dbno); /* Extend the freespace table if the new data block is off the end. */ - if (*findex >= freehdr.nvalid) { - ASSERT(*findex < dp->d_ops->free_max_bests(args->geo)); - freehdr.nvalid = *findex + 1; - bests[*findex] = cpu_to_be16(NULLDATAOFF); + if (*findex >= hdr->nvalid) { + ASSERT(*findex < args->geo->free_max_bests); + hdr->nvalid = *findex + 1; + hdr->bests[*findex] = cpu_to_be16(NULLDATAOFF); } /* * If this entry was for an empty data block (this should always be * true) then update the header. */ - if (bests[*findex] == cpu_to_be16(NULLDATAOFF)) { - freehdr.nused++; - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + if (hdr->bests[*findex] == cpu_to_be16(NULLDATAOFF)) { + hdr->nused++; + xfs_dir2_free_hdr_to_disk(mp, fbp->b_addr, hdr); xfs_dir2_free_log_header(args, fbp); } /* Update the freespace value for the new block in the table. */ - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - bests[*findex] = bf[0].length; + bf = xfs_dir2_data_bestfree_p(mp, dbp->b_addr); + hdr->bests[*findex] = bf[0].length; *dbpp = dbp; *fbpp = fbp; @@ -1737,11 +1788,10 @@ xfs_dir2_node_find_freeblk( struct xfs_da_state_blk *fblk, xfs_dir2_db_t *dbnop, struct xfs_buf **fbpp, + struct xfs_dir3_icfree_hdr *hdr, int *findexp, int length) { - struct xfs_dir3_icfree_hdr freehdr; - struct xfs_dir2_free *free = NULL; struct xfs_inode *dp = args->dp; struct xfs_trans *tp = args->trans; struct xfs_buf *fbp = NULL; @@ -1751,7 +1801,6 @@ xfs_dir2_node_find_freeblk( xfs_dir2_db_t dbno = -1; xfs_dir2_db_t fbno; xfs_fileoff_t fo; - __be16 *bests = NULL; int findex = 0; int error; @@ -1762,17 +1811,14 @@ xfs_dir2_node_find_freeblk( */ if (fblk) { fbp = fblk->bp; - free = fbp->b_addr; findex = fblk->index; + xfs_dir2_free_hdr_from_disk(dp->i_mount, hdr, fbp->b_addr); if (findex >= 0) { /* caller already found the freespace for us. */ - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - ASSERT(findex < freehdr.nvalid); - ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); - ASSERT(be16_to_cpu(bests[findex]) >= length); - dbno = freehdr.firstdb + findex; + ASSERT(findex < hdr->nvalid); + ASSERT(be16_to_cpu(hdr->bests[findex]) != NULLDATAOFF); + ASSERT(be16_to_cpu(hdr->bests[findex]) >= length); + dbno = hdr->firstdb + findex; goto found_block; } @@ -1814,15 +1860,13 @@ xfs_dir2_node_find_freeblk( if (!fbp) continue; - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(dp->i_mount, hdr, fbp->b_addr); /* Scan the free entry array for a large enough free space. */ - for (findex = freehdr.nvalid - 1; findex >= 0; findex--) { - if (be16_to_cpu(bests[findex]) != NULLDATAOFF && - be16_to_cpu(bests[findex]) >= length) { - dbno = freehdr.firstdb + findex; + for (findex = hdr->nvalid - 1; findex >= 0; findex--) { + if (be16_to_cpu(hdr->bests[findex]) != NULLDATAOFF && + be16_to_cpu(hdr->bests[findex]) >= length) { + dbno = hdr->firstdb + findex; goto found_block; } } @@ -1838,7 +1882,6 @@ found_block: return 0; } - /* * Add the data entry for a node-format directory name addition. * The leaf entry is added in xfs_dir2_leafn_add. @@ -1853,9 +1896,9 @@ xfs_dir2_node_addname_int( struct xfs_dir2_data_entry *dep; /* data entry pointer */ struct xfs_dir2_data_hdr *hdr; /* data block header */ struct xfs_dir2_data_free *bf; - struct xfs_dir2_free *free = NULL; /* freespace block structure */ struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; + struct xfs_dir3_icfree_hdr freehdr; struct xfs_buf *dbp; /* data block buffer */ struct xfs_buf *fbp; /* freespace buffer */ xfs_dir2_data_aoff_t aoff; @@ -1867,11 +1910,10 @@ xfs_dir2_node_addname_int( int needlog = 0; /* need to log data header */ int needscan = 0; /* need to rescan data frees */ __be16 *tagp; /* data entry tag pointer */ - __be16 *bests; - length = dp->d_ops->data_entsize(args->namelen); - error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &findex, - length); + length = xfs_dir2_data_entsize(dp->i_mount, args->namelen); + error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &freehdr, + &findex, length); if (error) return error; @@ -1893,19 +1935,19 @@ xfs_dir2_node_addname_int( /* we're going to have to log the free block index later */ logfree = 1; error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, - &findex); + &freehdr, &findex); } else { /* Read the data block in. */ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, dbno), - -1, &dbp); + 0, &dbp); } if (error) return error; /* setup for data block up now */ hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr); ASSERT(be16_to_cpu(bf[0].length) >= length); /* Point to the existing unused space. */ @@ -1926,28 +1968,26 @@ xfs_dir2_node_addname_int( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype); + tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, dbp, dep); /* Rescan the freespace and log the data block if needed. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, dbp); /* If the freespace block entry is now wrong, update it. */ - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - if (bests[findex] != bf[0].length) { - bests[findex] = bf[0].length; + if (freehdr.bests[findex] != bf[0].length) { + freehdr.bests[findex] = bf[0].length; logfree = 1; } /* Log the freespace entry if needed. */ if (logfree) - xfs_dir2_free_log_bests(args, fbp, findex, findex); + xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex); /* Return the data block and offset in args. */ args->blkno = (xfs_dablk_t)dbno; @@ -2155,8 +2195,6 @@ xfs_dir2_node_replace( int i; /* btree level */ xfs_ino_t inum; /* new inode number */ int ftype; /* new file type */ - xfs_dir2_leaf_t *leaf; /* leaf structure */ - xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ int rval; /* internal return value */ xfs_da_state_t *state; /* btree cursor */ @@ -2188,16 +2226,17 @@ xfs_dir2_node_replace( * and locked it. But paranoia is good. */ if (rval == -EEXIST) { - struct xfs_dir2_leaf_entry *ents; + struct xfs_dir3_icleaf_hdr leafhdr; + /* * Find the leaf entry. */ blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - leaf = blk->bp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); - lep = &ents[blk->index]; ASSERT(state->extravalid); + + xfs_dir2_leaf_hdr_from_disk(state->mp, &leafhdr, + blk->bp->b_addr); /* * Point to the data entry. */ @@ -2207,13 +2246,13 @@ xfs_dir2_node_replace( dep = (xfs_dir2_data_entry_t *) ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, - be32_to_cpu(lep->address))); + be32_to_cpu(leafhdr.ents[blk->index].address))); ASSERT(inum != be64_to_cpu(dep->inumber)); /* * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); - args->dp->d_ops->data_put_ftype(dep, ftype); + xfs_dir2_data_put_ftype(state->mp, dep, ftype); xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); rval = 0; } @@ -2270,7 +2309,7 @@ xfs_dir2_node_trim_free( if (!bp) return 0; free = bp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free); /* * If there are used entries, there's nothing to do. diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 59f9fb2241a5..c031c53d0f0d 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -8,7 +8,41 @@ struct dir_context; +/* + * In-core version of the leaf and free block headers to abstract the + * differences in the v2 and v3 disk format of the headers. + */ +struct xfs_dir3_icleaf_hdr { + uint32_t forw; + uint32_t back; + uint16_t magic; + uint16_t count; + uint16_t stale; + + /* + * Pointer to the on-disk format entries, which are behind the + * variable size (v4 vs v5) header in the on-disk block. + */ + struct xfs_dir2_leaf_entry *ents; +}; + +struct xfs_dir3_icfree_hdr { + uint32_t magic; + uint32_t firstdb; + uint32_t nvalid; + uint32_t nused; + + /* + * Pointer to the on-disk format entries, which are behind the + * variable size (v4 vs v5) header in the on-disk block. + */ + __be16 *bests; +}; + /* xfs_dir2.c */ +xfs_dahash_t xfs_ascii_ci_hashname(struct xfs_name *name); +enum xfs_dacmp xfs_ascii_ci_compname(struct xfs_da_args *args, + const unsigned char *name, int len); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, xfs_dir2_db_t *dbp); extern int xfs_dir_cilookup_result(struct xfs_da_args *args, @@ -26,6 +60,15 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, struct xfs_buf *lbp, struct xfs_buf *dbp); /* xfs_dir2_data.c */ +struct xfs_dir2_data_free *xfs_dir2_data_bestfree_p(struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr); +__be16 *xfs_dir2_data_entry_tag_p(struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep); +uint8_t xfs_dir2_data_get_ftype(struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep); +void xfs_dir2_data_put_ftype(struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep, uint8_t ftype); + #ifdef DEBUG extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else @@ -34,10 +77,10 @@ extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); -extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); -extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, - xfs_daddr_t mapped_bno); +int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp); +int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, + unsigned int flags); extern struct xfs_dir2_data_free * xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, @@ -47,10 +90,14 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, struct xfs_buf **bpp); /* xfs_dir2_leaf.c */ -extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); -extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); +void xfs_dir2_leaf_hdr_from_disk(struct xfs_mount *mp, + struct xfs_dir3_icleaf_hdr *to, struct xfs_dir2_leaf *from); +void xfs_dir2_leaf_hdr_to_disk(struct xfs_mount *mp, struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from); +int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, struct xfs_buf **bpp); +int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t fbno, struct xfs_buf **bpp); extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, struct xfs_buf *dbp); extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); @@ -62,7 +109,8 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, uint16_t magic); extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, - struct xfs_buf *bp, int first, int last); + struct xfs_dir3_icleaf_hdr *hdr, struct xfs_buf *bp, int first, + int last); extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); @@ -79,10 +127,11 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, - struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, - struct xfs_dir2_leaf *leaf); + struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ +void xfs_dir2_free_hdr_from_disk(struct xfs_mount *mp, + struct xfs_dir3_icfree_hdr *to, struct xfs_dir2_free *from); extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_buf *lbp); extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp, @@ -108,6 +157,14 @@ extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t fbno, struct xfs_buf **bpp); /* xfs_dir2_sf.c */ +xfs_ino_t xfs_dir2_sf_get_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep); +xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *hdr); +void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *hdr, xfs_ino_t ino); +uint8_t xfs_dir2_sf_get_ftype(struct xfs_mount *mp, + struct xfs_dir2_sf_entry *sfep); +struct xfs_dir2_sf_entry *xfs_dir2_sf_nextentry(struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep); extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, @@ -123,4 +180,39 @@ extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize); +static inline unsigned int +xfs_dir2_data_entsize( + struct xfs_mount *mp, + unsigned int namelen) +{ + unsigned int len; + + len = offsetof(struct xfs_dir2_data_entry, name[0]) + namelen + + sizeof(xfs_dir2_data_off_t) /* tag */; + if (xfs_sb_version_hasftype(&mp->m_sb)) + len += sizeof(uint8_t); + return round_up(len, XFS_DIR2_DATA_ALIGN); +} + +static inline xfs_dahash_t +xfs_dir2_hashname( + struct xfs_mount *mp, + struct xfs_name *name) +{ + if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb))) + return xfs_ascii_ci_hashname(name); + return xfs_da_hashname(name->name, name->len); +} + +static inline enum xfs_dacmp +xfs_dir2_compname( + struct xfs_da_args *args, + const unsigned char *name, + int len) +{ + if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb))) + return xfs_ascii_ci_compname(args, name, len); + return xfs_da_compname(args, name, len); +} + #endif /* __XFS_DIR2_PRIV_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 85f14fc2a8da..8b94d33d232f 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -37,6 +37,126 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args); static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); +static int +xfs_dir2_sf_entsize( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + int count = len; + + count += sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ + count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */ + + if (xfs_sb_version_hasftype(&mp->m_sb)) + count += sizeof(uint8_t); + return count; +} + +struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (void *)sfep + xfs_dir2_sf_entsize(mp, hdr, sfep->namelen); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. If the entry stores a filetype value, then it + * sits between the name and the inode number. The actual inode numbers can + * come in two formats as well, either 4 bytes or 8 bytes wide. + */ +xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + uint8_t *from = sfep->name + sfep->namelen; + + if (xfs_sb_version_hasftype(&mp->m_sb)) + from++; + + if (!hdr->i8count) + return get_unaligned_be32(from); + return get_unaligned_be64(from) & XFS_MAXINUMBER; +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + uint8_t *to = sfep->name + sfep->namelen; + + ASSERT(ino <= XFS_MAXINUMBER); + + if (xfs_sb_version_hasftype(&mp->m_sb)) + to++; + + if (hdr->i8count) + put_unaligned_be64(ino, to); + else + put_unaligned_be32(ino, to); +} + +xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + if (!hdr->i8count) + return get_unaligned_be32(hdr->parent); + return get_unaligned_be64(hdr->parent) & XFS_MAXINUMBER; +} + +void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + ASSERT(ino <= XFS_MAXINUMBER); + + if (hdr->i8count) + put_unaligned_be64(ino, hdr->parent); + else + put_unaligned_be32(ino, hdr->parent); +} + +/* + * The file type field is stored at the end of the name for filetype enabled + * shortform directories, or not at all otherwise. + */ +uint8_t +xfs_dir2_sf_get_ftype( + struct xfs_mount *mp, + struct xfs_dir2_sf_entry *sfep) +{ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + uint8_t ftype = sfep->name[sfep->namelen]; + + if (ftype < XFS_DIR3_FT_MAX) + return ftype; + } + + return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_sf_put_ftype( + struct xfs_mount *mp, + struct xfs_dir2_sf_entry *sfep, + uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); + + if (xfs_sb_version_hasftype(&mp->m_sb)) + sfep->name[sfep->namelen] = ftype; +} + /* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the @@ -125,7 +245,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - dp->d_ops->sf_put_parent_ino(sfhp, parent); + xfs_dir2_sf_put_parent_ino(sfhp, parent); return size; } @@ -135,64 +255,48 @@ xfs_dir2_block_sfsize( */ int /* error */ xfs_dir2_block_to_sf( - xfs_da_args_t *args, /* operation arguments */ + struct xfs_da_args *args, /* operation arguments */ struct xfs_buf *bp, int size, /* shortform directory size */ - xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ + struct xfs_dir2_sf_hdr *sfhp) /* shortform directory hdr */ { - xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* unused data pointer */ - char *endptr; /* end of data entries */ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int error; /* error return value */ int logflags; /* inode logging flags */ - xfs_mount_t *mp; /* filesystem mount point */ - char *ptr; /* current data pointer */ - xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ - xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ + struct xfs_dir2_sf_entry *sfep; /* shortform entry */ + struct xfs_dir2_sf_hdr *sfp; /* shortform directory header */ + unsigned int offset = args->geo->data_entry_offset; + unsigned int end; trace_xfs_dir2_block_to_sf(args); - dp = args->dp; - mp = dp->i_mount; - - /* - * allocate a temporary destination buffer the size of the inode - * to format the data into. Once we have formatted the data, we - * can free the block and copy the formatted data into the inode literal - * area. - */ - dst = kmem_alloc(mp->m_sb.sb_inodesize, 0); - hdr = bp->b_addr; - /* - * Copy the header into the newly allocate local space. + * Allocate a temporary destination buffer the size of the inode to + * format the data into. Once we have formatted the data, we can free + * the block and copy the formatted data into the inode literal area. */ - sfp = (xfs_dir2_sf_hdr_t *)dst; + sfp = kmem_alloc(mp->m_sb.sb_inodesize, 0); memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); /* - * Set up to loop over the block's entries. + * Loop over the active and unused entries. Stop when we reach the + * leaf/tail portion of the block. */ - ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = xfs_dir3_data_endp(args->geo, hdr); + end = xfs_dir3_data_end_offset(args->geo, bp->b_addr); sfep = xfs_dir2_sf_firstentry(sfp); - /* - * Loop over the active and unused entries. - * Stop when we reach the leaf/tail portion of the block. - */ - while (ptr < endptr) { + while (offset < end) { + struct xfs_dir2_data_unused *dup = bp->b_addr + offset; + struct xfs_dir2_data_entry *dep = bp->b_addr + offset; + /* * If it's unused, just skip over it. */ - dup = (xfs_dir2_data_unused_t *)ptr; if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ptr += be16_to_cpu(dup->length); + offset += be16_to_cpu(dup->length); continue; } - dep = (xfs_dir2_data_entry_t *)ptr; + /* * Skip . */ @@ -204,24 +308,22 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - dp->d_ops->sf_get_parent_ino(sfp)); + xfs_dir2_sf_get_parent_ino(sfp)); /* * Normal entry, copy it into shortform. */ else { sfep->namelen = dep->namelen; - xfs_dir2_sf_put_offset(sfep, - (xfs_dir2_data_aoff_t) - ((char *)dep - (char *)hdr)); + xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, dep->name, dep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, + xfs_dir2_sf_put_ino(mp, sfp, sfep, be64_to_cpu(dep->inumber)); - dp->d_ops->sf_put_ftype(sfep, - dp->d_ops->data_get_ftype(dep)); + xfs_dir2_sf_put_ftype(mp, sfep, + xfs_dir2_data_get_ftype(mp, dep)); - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } - ptr += dp->d_ops->data_entsize(dep->namelen); + offset += xfs_dir2_data_entsize(mp, dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); @@ -240,7 +342,7 @@ xfs_dir2_block_to_sf( * Convert the inode to local format and copy the data in. */ ASSERT(dp->i_df.if_bytes == 0); - xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size); + xfs_init_local_fork(dp, XFS_DATA_FORK, sfp, size); dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; dp->i_d.di_size = size; @@ -248,7 +350,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(dst); + kmem_free(sfp); return error; } @@ -277,13 +379,7 @@ xfs_dir2_sf_addname( ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT); dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Make sure the shortform value has some of its header. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } + ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; @@ -291,7 +387,7 @@ xfs_dir2_sf_addname( /* * Compute entry (and change in) size. */ - incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); + incr_isize = xfs_dir2_sf_entsize(dp->i_mount, sfp, args->namelen); objchange = 0; /* @@ -364,18 +460,17 @@ xfs_dir2_sf_addname_easy( xfs_dir2_data_aoff_t offset, /* offset to use for new ent */ int new_isize) /* new directory size */ { + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int byteoff; /* byte offset in sf dir */ - xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), + xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. @@ -388,8 +483,8 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir2_sf_put_ino(mp, sfp, sfep, args->inumber); + xfs_dir2_sf_put_ftype(mp, sfep, args->filetype); /* * Update the header and inode. @@ -416,9 +511,10 @@ xfs_dir2_sf_addname_hard( int objchange, /* changing inode number size */ int new_isize) /* new directory size */ { + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int add_datasize; /* data size need for new ent */ char *buf; /* buffer for old */ - xfs_inode_t *dp; /* incore directory inode */ int eof; /* reached end of old dir */ int nbytes; /* temp for byte copies */ xfs_dir2_data_aoff_t new_offset; /* next offset value */ @@ -432,8 +528,6 @@ xfs_dir2_sf_addname_hard( /* * Copy the old directory to the stack buffer. */ - dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_d.di_size; buf = kmem_alloc(old_isize, 0); @@ -444,13 +538,13 @@ xfs_dir2_sf_addname_hard( * to insert the new entry. * If it's going to end up at the end then oldsfep will point there. */ - for (offset = dp->d_ops->data_first_offset, + for (offset = args->geo->data_first_offset, oldsfep = xfs_dir2_sf_firstentry(oldsfp), - add_datasize = dp->d_ops->data_entsize(args->namelen), + add_datasize = xfs_dir2_data_entsize(mp, args->namelen), eof = (char *)oldsfep == &buf[old_isize]; !eof; - offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), + offset = new_offset + xfs_dir2_data_entsize(mp, oldsfep->namelen), + oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep), eof = (char *)oldsfep == &buf[old_isize]) { new_offset = xfs_dir2_sf_get_offset(oldsfep); if (offset + add_datasize <= new_offset) @@ -479,8 +573,8 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir2_sf_put_ino(mp, sfp, sfep, args->inumber); + xfs_dir2_sf_put_ftype(mp, sfep, args->filetype); sfp->count++; if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) sfp->i8count++; @@ -488,7 +582,7 @@ xfs_dir2_sf_addname_hard( * If there's more left to copy, do that. */ if (!eof) { - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } kmem_free(buf); @@ -510,7 +604,8 @@ xfs_dir2_sf_addname_pick( xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */ xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */ { - xfs_inode_t *dp; /* incore directory inode */ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int holefit; /* found hole it will fit in */ int i; /* entry number */ xfs_dir2_data_aoff_t offset; /* data block offset */ @@ -519,11 +614,9 @@ xfs_dir2_sf_addname_pick( int size; /* entry's data size */ int used; /* data bytes used */ - dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - size = dp->d_ops->data_entsize(args->namelen); - offset = dp->d_ops->data_first_offset; + size = xfs_dir2_data_entsize(mp, args->namelen); + offset = args->geo->data_first_offset; sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* @@ -535,8 +628,8 @@ xfs_dir2_sf_addname_pick( if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + xfs_dir2_data_entsize(mp, sfep->namelen); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } /* * Calculate data bytes used excluding the new entry, if this @@ -578,7 +671,8 @@ static void xfs_dir2_sf_check( xfs_da_args_t *args) /* operation arguments */ { - xfs_inode_t *dp; /* incore directory inode */ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int i; /* entry number */ int i8count; /* number of big inode#s */ xfs_ino_t ino; /* entry inode number */ @@ -586,23 +680,21 @@ xfs_dir2_sf_check( xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - dp = args->dp; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = dp->d_ops->data_first_offset; - ino = dp->d_ops->sf_get_parent_ino(sfp); + offset = args->geo->data_first_offset; + ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = dp->d_ops->sf_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); + xfs_dir2_data_entsize(mp, sfep->namelen); + ASSERT(xfs_dir2_sf_get_ftype(mp, sfep) < XFS_DIR3_FT_MAX); } ASSERT(i8count == sfp->i8count); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); @@ -622,22 +714,16 @@ xfs_dir2_sf_verify( struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; - const struct xfs_dir_ops *dops; struct xfs_ifork *ifp; xfs_ino_t ino; int i; int i8count; int offset; - int size; + int64_t size; int error; uint8_t filetype; ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); - /* - * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops, - * so we can only trust the mountpoint to have the right pointer. - */ - dops = xfs_dir_get_ops(mp, NULL); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; @@ -653,12 +739,12 @@ xfs_dir2_sf_verify( endp = (char *)sfp + size; /* Check .. entry */ - ino = dops->sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) return __this_address; - offset = dops->data_first_offset; + offset = mp->m_dir_geo->data_first_offset; /* Check all reported entries */ sfep = xfs_dir2_sf_firstentry(sfp); @@ -680,7 +766,7 @@ xfs_dir2_sf_verify( * within the data buffer. The next entry starts after the * name component, so nextentry is an acceptable test. */ - next_sfep = dops->sf_nextentry(sfp, sfep); + next_sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); if (endp < (char *)next_sfep) return __this_address; @@ -689,19 +775,19 @@ xfs_dir2_sf_verify( return __this_address; /* Check the inode number. */ - ino = dops->sf_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) return __this_address; /* Check the file type. */ - filetype = dops->sf_get_ftype(sfep); + filetype = xfs_dir2_sf_get_ftype(mp, sfep); if (filetype >= XFS_DIR3_FT_MAX) return __this_address; offset = xfs_dir2_sf_get_offset(sfep) + - dops->data_entsize(sfep->namelen); + xfs_dir2_data_entsize(mp, sfep->namelen); sfep = next_sfep; } @@ -763,7 +849,7 @@ xfs_dir2_sf_create( /* * Now can put in the inode number, since i8count is set. */ - dp->d_ops->sf_put_parent_ino(sfp, pino); + xfs_dir2_sf_put_parent_ino(sfp, pino); sfp->count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); @@ -779,7 +865,8 @@ int /* error */ xfs_dir2_sf_lookup( xfs_da_args_t *args) /* operation arguments */ { - xfs_inode_t *dp; /* incore directory inode */ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int i; /* entry index */ int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ @@ -790,16 +877,9 @@ xfs_dir2_sf_lookup( trace_xfs_dir2_sf_lookup(args); xfs_dir2_sf_check(args); - dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Bail out if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } + ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; @@ -818,7 +898,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = dp->d_ops->sf_get_parent_ino(sfp); + args->inumber = xfs_dir2_sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; return -EEXIST; @@ -828,18 +908,17 @@ xfs_dir2_sf_lookup( */ ci_sfep = NULL; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode * number. If it's the first case-insensitive match, store the * inode number and continue looking for an exact match. */ - cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name, - sfep->namelen); + cmp = xfs_dir2_compname(args, sfep->name, sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); - args->filetype = dp->d_ops->sf_get_ftype(sfep); + args->inumber = xfs_dir2_sf_get_ino(mp, sfp, sfep); + args->filetype = xfs_dir2_sf_get_ftype(mp, sfep); if (cmp == XFS_CMP_EXACT) return -EEXIST; ci_sfep = sfep; @@ -864,8 +943,9 @@ int /* error */ xfs_dir2_sf_removename( xfs_da_args_t *args) { + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int byteoff; /* offset of removed entry */ - xfs_inode_t *dp; /* incore directory inode */ int entsize; /* this entry's size */ int i; /* shortform entry index */ int newsize; /* new inode size */ @@ -875,17 +955,9 @@ xfs_dir2_sf_removename( trace_xfs_dir2_sf_removename(args); - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); oldsize = (int)dp->i_d.di_size; - /* - * Bail out if the directory is way too short. - */ - if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } + ASSERT(oldsize >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; @@ -895,10 +967,10 @@ xfs_dir2_sf_removename( * Find the one we're deleting. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == + ASSERT(xfs_dir2_sf_get_ino(mp, sfp, sfep) == args->inumber); break; } @@ -912,7 +984,7 @@ xfs_dir2_sf_removename( * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = dp->d_ops->sf_entsize(sfp, args->namelen); + entsize = xfs_dir2_sf_entsize(mp, sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -945,13 +1017,35 @@ xfs_dir2_sf_removename( } /* + * Check whether the sf dir replace operation need more blocks. + */ +bool +xfs_dir2_sf_replace_needblock( + struct xfs_inode *dp, + xfs_ino_t inum) +{ + int newsize; + struct xfs_dir2_sf_hdr *sfp; + + if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return false; + + sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; + newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; + + return inum > XFS_DIR2_MAX_SHORT_INUM && + sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp); +} + +/* * Replace the inode number of an entry in a shortform directory. */ int /* error */ xfs_dir2_sf_replace( xfs_da_args_t *args) /* operation arguments */ { - xfs_inode_t *dp; /* incore directory inode */ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; int i; /* entry index */ xfs_ino_t ino=0; /* entry old inode number */ int i8elevated; /* sf_toino8 set i8count=1 */ @@ -960,16 +1054,8 @@ xfs_dir2_sf_replace( trace_xfs_dir2_sf_replace(args); - dp = args->dp; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Bail out if the shortform directory is way too small. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } + ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; @@ -980,17 +1066,14 @@ xfs_dir2_sf_replace( */ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { int error; /* error return value */ - int newsize; /* new inode size */ - newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; /* * Won't fit as shortform, convert to block then do replace. */ - if (newsize > XFS_IFORK_DSIZE(dp)) { + if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) { error = xfs_dir2_sf_to_block(args); - if (error) { + if (error) return error; - } return xfs_dir2_block_replace(args); } /* @@ -1008,22 +1091,23 @@ xfs_dir2_sf_replace( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - ino = dp->d_ops->sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); - dp->d_ops->sf_put_parent_ino(sfp, args->inumber); + xfs_dir2_sf_put_parent_ino(sfp, args->inumber); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ino = dp->d_ops->sf_get_ino(sfp, sfep); + ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); ASSERT(args->inumber != ino); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir2_sf_put_ino(mp, sfp, sfep, + args->inumber); + xfs_dir2_sf_put_ftype(mp, sfep, args->filetype); break; } } @@ -1076,8 +1160,9 @@ static void xfs_dir2_sf_toino4( xfs_da_args_t *args) /* operation arguments */ { + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; char *buf; /* old dir's buffer */ - xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ @@ -1088,8 +1173,6 @@ xfs_dir2_sf_toino4( trace_xfs_dir2_sf_toino4(args); - dp = args->dp; - /* * Copy the old directory to the buffer. * Then nuke it from the inode, and add the new buffer to the inode. @@ -1116,21 +1199,22 @@ xfs_dir2_sf_toino4( */ sfp->count = oldsfp->count; sfp->i8count = 0; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep), + oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset)); memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + xfs_dir2_sf_put_ino(mp, sfp, sfep, + xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep)); + xfs_dir2_sf_put_ftype(mp, sfep, + xfs_dir2_sf_get_ftype(mp, oldsfep)); } /* * Clean up the inode. @@ -1149,8 +1233,9 @@ static void xfs_dir2_sf_toino8( xfs_da_args_t *args) /* operation arguments */ { + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; char *buf; /* old dir's buffer */ - xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ @@ -1161,8 +1246,6 @@ xfs_dir2_sf_toino8( trace_xfs_dir2_sf_toino8(args); - dp = args->dp; - /* * Copy the old directory to the buffer. * Then nuke it from the inode, and add the new buffer to the inode. @@ -1189,21 +1272,22 @@ xfs_dir2_sf_toino8( */ sfp->count = oldsfp->count; sfp->i8count = 1; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep), + oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset)); memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + xfs_dir2_sf_put_ino(mp, sfp, sfep, + xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep)); + xfs_dir2_sf_put_ftype(mp, sfep, + xfs_dir2_sf_get_ftype(mp, oldsfep)); } /* * Clean up the inode. diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index e8bd688a4073..bedc1e752b60 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -35,10 +35,10 @@ xfs_calc_dquots_per_chunk( xfs_failaddr_t xfs_dquot_verify( - struct xfs_mount *mp, - xfs_disk_dquot_t *ddq, - xfs_dqid_t id, - uint type) /* used only during quotacheck */ + struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, + xfs_dqid_t id, + uint type) /* used only during quotacheck */ { /* * We can encounter an uninitialized dquot buffer for 2 reasons: diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index c968b60cee15..1b7dcbae051c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -920,13 +920,13 @@ static inline uint xfs_dinode_size(int version) * This enum is used in string mapping in xfs_trace.h; please keep the * TRACE_DEFINE_ENUMs for it up to date. */ -typedef enum xfs_dinode_fmt { +enum xfs_dinode_fmt { XFS_DINODE_FMT_DEV, /* xfs_dev_t */ XFS_DINODE_FMT_LOCAL, /* bulk data */ XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ XFS_DINODE_FMT_UUID /* added long ago, but never used */ -} xfs_dinode_fmt_t; +}; #define XFS_INODE_FORMAT_STR \ { XFS_DINODE_FMT_DEV, "dev" }, \ @@ -1144,11 +1144,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) /* * This is the main portion of the on-disk representation of quota - * information for a user. This is the q_core of the xfs_dquot_t that + * information for a user. This is the q_core of the struct xfs_dquot that * is kept in kernel memory. We pad this with some more expansion room * to construct the on disk structure. */ -typedef struct xfs_disk_dquot { +struct xfs_disk_dquot { __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ __u8 d_version; /* dquot version */ __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ @@ -1171,15 +1171,15 @@ typedef struct xfs_disk_dquot { __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ __be16 d_pad; -} xfs_disk_dquot_t; +}; /* * This is what goes on disk. This is separated from the xfs_disk_dquot because * carrying the unnecessary padding would be a waste of memory. */ typedef struct xfs_dqblk { - xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ - char dd_fill[4]; /* filling for posterity */ + struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */ + char dd_fill[4];/* filling for posterity */ /* * These two are only present on filesystems with the CRC bits set. diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index e9371a8e0e26..ef95ca07d084 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -324,7 +324,7 @@ typedef struct xfs_growfs_rt { * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE */ typedef struct xfs_bstime { - time_t tv_sec; /* seconds */ + __kernel_long_t tv_sec; /* seconds */ __s32 tv_nsec; /* and nanoseconds */ } xfs_bstime_t; @@ -416,7 +416,7 @@ struct xfs_bulkstat { /* * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen + * and using two 16bit values to hold new 32bit projid was chosen * to retain compatibility with "old" filesystems). */ static inline uint32_t diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 588d44613094..988cde7744e6 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -544,7 +544,10 @@ xfs_inobt_insert_sprec( nrec->ir_free, &i); if (error) goto error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error; + } goto out; } @@ -557,17 +560,23 @@ xfs_inobt_insert_sprec( error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); - XFS_WANT_CORRUPTED_GOTO(mp, - rec.ir_startino == nrec->ir_startino, - error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error; + } + if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) { + error = -EFSCORRUPTED; + goto error; + } /* * This should never fail. If we have coexisting records that * cannot merge, something is seriously wrong. */ - XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec), - error); + if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) { + error = -EFSCORRUPTED; + goto error; + } trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino, rec.ir_holemask, nrec->ir_startino, @@ -1057,7 +1066,8 @@ xfs_ialloc_next_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; } return 0; @@ -1081,7 +1091,8 @@ xfs_ialloc_get_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; } return 0; @@ -1161,12 +1172,18 @@ xfs_dialloc_ag_inobt( error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0); + if (XFS_IS_CORRUPT(mp, j != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if (rec.ir_freecount > 0) { /* @@ -1321,19 +1338,28 @@ xfs_dialloc_ag_inobt( error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } for (;;) { error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } if (rec.ir_freecount > 0) break; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } } alloc_inode: @@ -1393,7 +1419,8 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(lcur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) + return -EFSCORRUPTED; /* * See if we've landed in the parent inode record. The finobt @@ -1416,10 +1443,16 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(rcur, &rrec, &j); if (error) goto error_rcur; - XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur); + if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) { + error = -EFSCORRUPTED; + goto error_rcur; + } } - XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur); + if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) { + error = -EFSCORRUPTED; + goto error_rcur; + } if (i == 1 && j == 1) { /* * Both the left and right records are valid. Choose the closer @@ -1472,7 +1505,8 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; return 0; } } @@ -1483,12 +1517,14 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; return 0; } @@ -1510,20 +1546,24 @@ xfs_dialloc_ag_update_inobt( error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + return -EFSCORRUPTED; ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) && - (rec.ir_freecount == frec->ir_freecount)); + if (XFS_IS_CORRUPT(cur->bc_mp, + rec.ir_free != frec->ir_free || + rec.ir_freecount != frec->ir_freecount)) + return -EFSCORRUPTED; return xfs_inobt_update(cur, &rec); } @@ -1933,14 +1973,20 @@ xfs_difree_inobt( __func__, error); goto error0; } - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } error = xfs_inobt_get_rec(cur, &rec, &i); if (error) { xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", __func__, error); goto error0; } - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error0; + } /* * Get the offset in the inode chunk. */ @@ -2052,7 +2098,10 @@ xfs_difree_finobt( * freed an inode in a previously fully allocated chunk. If not, * something is out of sync. */ - XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); + if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) { + error = -EFSCORRUPTED; + goto error; + } error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask, ibtrec->ir_count, @@ -2075,14 +2124,20 @@ xfs_difree_finobt( error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto error; + } rec.ir_free |= XFS_INOBT_MASK(offset); rec.ir_freecount++; - XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) && - (rec.ir_freecount == ibtrec->ir_freecount), - error); + if (XFS_IS_CORRUPT(mp, + rec.ir_free != ibtrec->ir_free || + rec.ir_freecount != ibtrec->ir_freecount)) { + error = -EFSCORRUPTED; + goto error; + } /* * The content of inobt records should always match between the inobt diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 7bc87408f1a0..52451809c478 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -596,7 +596,7 @@ xfs_iext_realloc_root( struct xfs_ifork *ifp, struct xfs_iext_cursor *cur) { - size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec); + int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec); void *new; /* account for the prev/next pointers */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 28ab3c5255e1..8afacfe4be0a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -213,13 +213,12 @@ xfs_inode_from_disk( to->di_version = from->di_version; if (to->di_version == 1) { set_nlink(inode, be16_to_cpu(from->di_onlink)); - to->di_projid_lo = 0; - to->di_projid_hi = 0; + to->di_projid = 0; to->di_version = 2; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); - to->di_projid_lo = be16_to_cpu(from->di_projid_lo); - to->di_projid_hi = be16_to_cpu(from->di_projid_hi); + to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | + be16_to_cpu(from->di_projid_lo); } to->di_format = from->di_format; @@ -256,8 +255,8 @@ xfs_inode_from_disk( if (to->di_version == 3) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); - to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); - to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); + to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec); + to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } @@ -279,8 +278,8 @@ xfs_inode_to_disk( to->di_format = from->di_format; to->di_uid = cpu_to_be32(from->di_uid); to->di_gid = cpu_to_be32(from->di_gid); - to->di_projid_lo = cpu_to_be16(from->di_projid_lo); - to->di_projid_hi = cpu_to_be16(from->di_projid_hi); + to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff); + to->di_projid_hi = cpu_to_be16(from->di_projid >> 16); memset(to->di_pad, 0, sizeof(to->di_pad)); to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec); @@ -306,8 +305,8 @@ xfs_inode_to_disk( if (from->di_version == 3) { to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); - to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); - to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); + to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec); + to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); @@ -632,8 +631,6 @@ xfs_iread( if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { - /* initialise the on-disk inode core */ - memset(&ip->i_d, 0, sizeof(ip->i_d)); VFS_I(ip)->i_generation = prandom_u32(); ip->i_d.di_version = 3; return 0; diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index ab0f84165317..fd94b1078722 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -21,8 +21,7 @@ struct xfs_icdinode { uint16_t di_flushiter; /* incremented on flush */ uint32_t di_uid; /* owner's user id */ uint32_t di_gid; /* owner's group id */ - uint16_t di_projid_lo; /* lower part of owner's project id */ - uint16_t di_projid_hi; /* higher part of owner's project id */ + uint32_t di_projid; /* owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ @@ -37,7 +36,7 @@ struct xfs_icdinode { uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ - xfs_ictimestamp_t di_crtime; /* time created */ + struct timespec64 di_crtime; /* time created */ }; /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index c643beeb5a24..ad2b9c313fd2 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -75,11 +75,15 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); break; default: + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, + dip, sizeof(*dip), __this_address); return -EFSCORRUPTED; } break; default: + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, + sizeof(*dip), __this_address); return -EFSCORRUPTED; } if (error) @@ -110,14 +114,16 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); break; default: + xfs_inode_verifier_error(ip, error, __func__, dip, + sizeof(*dip), __this_address); error = -EFSCORRUPTED; break; } if (error) { - kmem_zone_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; if (ip->i_cowfp) - kmem_zone_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); ip->i_cowfp = NULL; xfs_idestroy_fork(ip, XFS_DATA_FORK); } @@ -129,7 +135,7 @@ xfs_init_local_fork( struct xfs_inode *ip, int whichfork, const void *data, - int size) + int64_t size) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); int mem_size = size, real_size = 0; @@ -467,11 +473,11 @@ xfs_iroot_realloc( void xfs_idata_realloc( struct xfs_inode *ip, - int byte_diff, + int64_t byte_diff, int whichfork) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - int new_size = (int)ifp->if_bytes + byte_diff; + int64_t new_size = ifp->if_bytes + byte_diff; ASSERT(new_size >= 0); ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork)); @@ -525,10 +531,10 @@ xfs_idestroy_fork( } if (whichfork == XFS_ATTR_FORK) { - kmem_zone_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; } else if (whichfork == XFS_COW_FORK) { - kmem_zone_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); ip->i_cowfp = NULL; } } @@ -552,7 +558,7 @@ xfs_iextents_copy( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_iext_cursor icur; struct xfs_bmbt_irec rec; - int copied = 0; + int64_t copied = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 00c62ce170d0..500333d0101e 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -13,16 +13,16 @@ struct xfs_dinode; * File incore extent information, present for each of data & attr forks. */ struct xfs_ifork { - int if_bytes; /* bytes in if_u1 */ - unsigned int if_seq; /* fork mod counter */ + int64_t if_bytes; /* bytes in if_u1 */ struct xfs_btree_block *if_broot; /* file's incore btree root */ - short if_broot_bytes; /* bytes allocated for root */ - unsigned char if_flags; /* per-fork flags */ + unsigned int if_seq; /* fork mod counter */ int if_height; /* height of the extent tree */ union { void *if_root; /* extent tree root */ char *if_data; /* inline file data */ } if_u1; + short if_broot_bytes; /* bytes allocated for root */ + unsigned char if_flags; /* per-fork flags */ }; /* @@ -87,18 +87,24 @@ struct xfs_ifork { #define XFS_IFORK_MAXEXT(ip, w) \ (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) +#define xfs_ifork_has_extents(ip, w) \ + (XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_EXTENTS || \ + XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_BTREE) + struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); -void xfs_idata_realloc(struct xfs_inode *, int, int); +void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff, + int whichfork); void xfs_iroot_realloc(struct xfs_inode *, int, int); int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, int); -void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); +void xfs_init_local_fork(struct xfs_inode *ip, int whichfork, + const void *data, int64_t size); xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index e5f97c69b320..8ef31d71a9c7 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -432,9 +432,9 @@ static inline uint xfs_log_dinode_size(int version) } /* - * Buffer Log Format defintions + * Buffer Log Format definitions * - * These are the physical dirty bitmap defintions for the log format structure. + * These are the physical dirty bitmap definitions for the log format structure. */ #define XFS_BLF_CHUNK 128 #define XFS_BLF_SHIFT 7 diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index f3d18eaecebb..3bf671637a91 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -30,14 +30,14 @@ typedef struct xlog_recover_item { xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; -typedef struct xlog_recover { +struct xlog_recover { struct hlist_node r_list; xlog_tid_t r_log_tid; /* log's transaction id */ xfs_trans_header_t r_theader; /* trans header for partial */ int r_state; /* not needed */ xfs_lsn_t r_lsn; /* xact lsn */ struct list_head r_itemq; /* q for items */ -} xlog_recover_t; +}; #define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 9a7fadb1361c..d7d702ee4d1a 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -200,7 +200,10 @@ xfs_refcount_insert( error = xfs_btree_insert(cur, i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } out_error: if (error) @@ -227,10 +230,16 @@ xfs_refcount_delete( error = xfs_refcount_get_rec(cur, &irec, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); error = xfs_btree_delete(cur, i); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (error) goto out_error; error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); @@ -349,7 +358,10 @@ xfs_refcount_split_extent( error = xfs_refcount_get_rec(cur, &rcext, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -371,7 +383,10 @@ xfs_refcount_split_extent( error = xfs_refcount_insert(cur, &tmp, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } return error; out_error: @@ -410,19 +425,27 @@ xfs_refcount_merge_center_extents( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (center->rc_refcount > 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } /* Enlarge the left extent. */ @@ -430,7 +453,10 @@ xfs_refcount_merge_center_extents( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } left->rc_blockcount = extlen; error = xfs_refcount_update(cur, left); @@ -469,14 +495,18 @@ xfs_refcount_merge_left_extent( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } /* Enlarge the left extent. */ @@ -484,7 +514,10 @@ xfs_refcount_merge_left_extent( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } left->rc_blockcount += cleft->rc_blockcount; error = xfs_refcount_update(cur, left); @@ -526,14 +559,18 @@ xfs_refcount_merge_right_extent( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } /* Enlarge the right extent. */ @@ -541,7 +578,10 @@ xfs_refcount_merge_right_extent( &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } right->rc_startblock -= cright->rc_blockcount; right->rc_blockcount += cright->rc_blockcount; @@ -587,7 +627,10 @@ xfs_refcount_find_left_extents( error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (xfs_refc_next(&tmp) != agbno) return 0; @@ -605,8 +648,10 @@ xfs_refcount_find_left_extents( error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) @@ -671,7 +716,10 @@ xfs_refcount_find_right_extents( error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (tmp.rc_startblock != agbno + aglen) return 0; @@ -689,8 +737,10 @@ xfs_refcount_find_right_extents( error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, - out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) @@ -913,8 +963,11 @@ xfs_refcount_adjust_extents( &found_tmp); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - found_tmp == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, + found_tmp != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } cur->bc_private.a.priv.refc.nr_ops++; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, @@ -955,8 +1008,10 @@ xfs_refcount_adjust_extents( error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } cur->bc_private.a.priv.refc.nr_ops++; goto advloop; } else { @@ -1122,7 +1177,7 @@ xfs_refcount_finish_one( XFS_ALLOC_FLAG_FREEING, &agbp); if (error) return error; - if (!agbp) + if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) return -EFSCORRUPTED; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); @@ -1272,7 +1327,10 @@ xfs_refcount_find_shared( error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1284,7 +1342,10 @@ xfs_refcount_find_shared( error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } /* If the extent starts after the range we want, bail out */ @@ -1312,7 +1373,10 @@ xfs_refcount_find_shared( error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; @@ -1413,8 +1477,11 @@ xfs_refcount_adjust_cow_extents( switch (adj) { case XFS_REFCOUNT_ADJUST_COW_ALLOC: /* Adding a CoW reservation, there should be nothing here. */ - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - ext.rc_startblock >= agbno + aglen, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, + agbno + aglen > ext.rc_startblock)) { + error = -EFSCORRUPTED; + goto out_error; + } tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; @@ -1426,17 +1493,25 @@ xfs_refcount_adjust_cow_extents( &found_tmp); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - found_tmp == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } break; case XFS_REFCOUNT_ADJUST_COW_FREE: /* Removing a CoW reservation, there should be one extent. */ - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - ext.rc_startblock == agbno, out_error); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - ext.rc_blockcount == aglen, out_error); - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - ext.rc_refcount == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { + error = -EFSCORRUPTED; + goto out_error; + } + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { + error = -EFSCORRUPTED; + goto out_error; + } + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } ext.rc_refcount = 0; trace_xfs_refcount_modify_extent(cur->bc_mp, @@ -1444,8 +1519,10 @@ xfs_refcount_adjust_cow_extents( error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, - found_rec == 1, out_error); + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } break; default: ASSERT(0); @@ -1584,14 +1661,15 @@ struct xfs_refcount_recovery { /* Stuff an extent on the recovery list. */ STATIC int xfs_refcount_recover_extent( - struct xfs_btree_cur *cur, + struct xfs_btree_cur *cur, union xfs_btree_rec *rec, void *priv) { struct list_head *debris = priv; struct xfs_refcount_recovery *rr; - if (be32_to_cpu(rec->refc.rc_refcount) != 1) + if (XFS_IS_CORRUPT(cur->bc_mp, + be32_to_cpu(rec->refc.rc_refcount) != 1)) return -EFSCORRUPTED; rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 38e9414878b3..ff9412f113c4 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -113,7 +113,10 @@ xfs_rmap_insert( error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done); + if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } rcur->bc_rec.r.rm_startblock = agbno; rcur->bc_rec.r.rm_blockcount = len; @@ -123,7 +126,10 @@ xfs_rmap_insert( error = xfs_btree_insert(rcur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); + if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } done: if (error) trace_xfs_rmap_insert_error(rcur->bc_mp, @@ -149,12 +155,18 @@ xfs_rmap_delete( error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); + if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_delete(rcur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); + if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } done: if (error) trace_xfs_rmap_delete_error(rcur->bc_mp, @@ -406,24 +418,39 @@ xfs_rmap_free_check_owner( return 0; /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + if (XFS_IS_CORRUPT(mp, + (flags & XFS_RMAP_UNWRITTEN) != + (rec->rm_flags & XFS_RMAP_UNWRITTEN))) { + error = -EFSCORRUPTED; + goto out; + } /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) { + error = -EFSCORRUPTED; + goto out; + } /* Check the offset, if necessary. */ if (XFS_RMAP_NON_INODE_OWNER(owner)) goto out; if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, - out); + if (XFS_IS_CORRUPT(mp, + !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) { + error = -EFSCORRUPTED; + goto out; + } } else { - XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + rec->rm_blockcount >= offset + len, - out); + if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) { + error = -EFSCORRUPTED; + goto out; + } + if (XFS_IS_CORRUPT(mp, + offset + len > ltoff + rec->rm_blockcount)) { + error = -EFSCORRUPTED; + goto out; + } } out: @@ -482,12 +509,18 @@ xfs_rmap_unmap( error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } error = xfs_rmap_get_rec(cur, <rec, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, cur->bc_private.a.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, @@ -502,8 +535,12 @@ xfs_rmap_unmap( * be the case that the "left" extent goes all the way to EOFS. */ if (owner == XFS_RMAP_OWN_NULL) { - XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock + - ltrec.rm_blockcount, out_error); + if (XFS_IS_CORRUPT(mp, + bno < + ltrec.rm_startblock + ltrec.rm_blockcount)) { + error = -EFSCORRUPTED; + goto out_error; + } goto out_done; } @@ -526,15 +563,22 @@ xfs_rmap_unmap( error = xfs_rmap_get_rec(cur, &rtrec, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } if (rtrec.rm_startblock >= bno + len) goto out_done; } /* Make sure the extent we found covers the entire freeing range. */ - XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); + if (XFS_IS_CORRUPT(mp, + ltrec.rm_startblock > bno || + ltrec.rm_startblock + ltrec.rm_blockcount < + bno + len)) { + error = -EFSCORRUPTED; + goto out_error; + } /* Check owner information. */ error = xfs_rmap_free_check_owner(mp, ltoff, <rec, len, owner, @@ -551,7 +595,10 @@ xfs_rmap_unmap( error = xfs_btree_delete(cur, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } else if (ltrec.rm_startblock == bno) { /* * overlap left hand side of extent: move the start, trim the @@ -743,7 +790,10 @@ xfs_rmap_map( error = xfs_rmap_get_rec(cur, <rec, &have_lt); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); + if (XFS_IS_CORRUPT(mp, have_lt != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, cur->bc_private.a.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, @@ -753,9 +803,12 @@ xfs_rmap_map( have_lt = 0; } - XFS_WANT_CORRUPTED_GOTO(mp, - have_lt == 0 || - ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error); + if (XFS_IS_CORRUPT(mp, + have_lt != 0 && + ltrec.rm_startblock + ltrec.rm_blockcount > bno)) { + error = -EFSCORRUPTED; + goto out_error; + } /* * Increment the cursor to see if we have a right-adjacent record to our @@ -769,9 +822,14 @@ xfs_rmap_map( error = xfs_rmap_get_rec(cur, >rec, &have_gt); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock, - out_error); + if (XFS_IS_CORRUPT(mp, have_gt != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } + if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) { + error = -EFSCORRUPTED; + goto out_error; + } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, cur->bc_private.a.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, @@ -821,7 +879,10 @@ xfs_rmap_map( error = xfs_btree_delete(cur, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } /* point the cursor back to the left record and update */ @@ -865,7 +926,10 @@ xfs_rmap_map( error = xfs_btree_insert(cur, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } } trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, @@ -957,12 +1021,18 @@ xfs_rmap_convert( error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_rmap_get_rec(cur, &PREV, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, cur->bc_private.a.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, @@ -995,10 +1065,16 @@ xfs_rmap_convert( error = xfs_rmap_get_rec(cur, &LEFT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - XFS_WANT_CORRUPTED_GOTO(mp, - LEFT.rm_startblock + LEFT.rm_blockcount <= bno, - done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } + if (XFS_IS_CORRUPT(mp, + LEFT.rm_startblock + LEFT.rm_blockcount > + bno)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, cur->bc_private.a.agno, LEFT.rm_startblock, LEFT.rm_blockcount, LEFT.rm_owner, @@ -1017,7 +1093,10 @@ xfs_rmap_convert( error = xfs_btree_increment(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_increment(cur, 0, &i); if (error) goto done; @@ -1026,9 +1105,14 @@ xfs_rmap_convert( error = xfs_rmap_get_rec(cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock, - done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } + if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, cur->bc_private.a.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, @@ -1055,7 +1139,10 @@ xfs_rmap_convert( error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } /* * Switch out based on the FILLING and CONTIG state bits. @@ -1071,7 +1158,10 @@ xfs_rmap_convert( error = xfs_btree_increment(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, @@ -1079,11 +1169,17 @@ xfs_rmap_convert( error = xfs_btree_delete(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, @@ -1091,11 +1187,17 @@ xfs_rmap_convert( error = xfs_btree_delete(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW = LEFT; NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; error = xfs_rmap_update(cur, &NEW); @@ -1115,11 +1217,17 @@ xfs_rmap_convert( error = xfs_btree_delete(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW = LEFT; NEW.rm_blockcount += PREV.rm_blockcount; error = xfs_rmap_update(cur, &NEW); @@ -1135,7 +1243,10 @@ xfs_rmap_convert( error = xfs_btree_increment(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, @@ -1143,11 +1254,17 @@ xfs_rmap_convert( error = xfs_btree_delete(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW = PREV; NEW.rm_blockcount = len + RIGHT.rm_blockcount; NEW.rm_flags = newext; @@ -1214,7 +1331,10 @@ xfs_rmap_convert( error = xfs_btree_insert(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } break; case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: @@ -1253,7 +1373,10 @@ xfs_rmap_convert( oldext, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_startblock = bno; NEW.rm_owner = owner; NEW.rm_offset = offset; @@ -1265,7 +1388,10 @@ xfs_rmap_convert( error = xfs_btree_insert(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } break; case 0: @@ -1295,7 +1421,10 @@ xfs_rmap_convert( error = xfs_btree_insert(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } /* * Reset the cursor to the position of the new extent * we are about to insert as we can't trust it after @@ -1305,7 +1434,10 @@ xfs_rmap_convert( oldext, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); + if (XFS_IS_CORRUPT(mp, i != 0)) { + error = -EFSCORRUPTED; + goto done; + } /* new middle extent - newext */ cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; cur->bc_rec.r.rm_flags |= newext; @@ -1314,7 +1446,10 @@ xfs_rmap_convert( error = xfs_btree_insert(cur, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } break; case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: @@ -1383,7 +1518,10 @@ xfs_rmap_convert_shared( &PREV, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } ASSERT(PREV.rm_offset <= offset); ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); @@ -1406,9 +1544,12 @@ xfs_rmap_convert_shared( goto done; if (i) { state |= RMAP_LEFT_VALID; - XFS_WANT_CORRUPTED_GOTO(mp, - LEFT.rm_startblock + LEFT.rm_blockcount <= bno, - done); + if (XFS_IS_CORRUPT(mp, + LEFT.rm_startblock + LEFT.rm_blockcount > + bno)) { + error = -EFSCORRUPTED; + goto done; + } if (xfs_rmap_is_mergeable(&LEFT, owner, newext)) state |= RMAP_LEFT_CONTIG; } @@ -1423,9 +1564,14 @@ xfs_rmap_convert_shared( error = xfs_rmap_get_rec(cur, &RIGHT, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock, - done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } + if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { + error = -EFSCORRUPTED; + goto done; + } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, cur->bc_private.a.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, @@ -1472,7 +1618,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1495,7 +1644,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount += PREV.rm_blockcount; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1518,7 +1670,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount += RIGHT.rm_blockcount; NEW.rm_flags = RIGHT.rm_flags; error = xfs_rmap_update(cur, &NEW); @@ -1538,7 +1693,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_flags = newext; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1570,7 +1728,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount += len; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1612,7 +1773,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount = offset - NEW.rm_offset; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1644,7 +1808,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount -= len; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1679,7 +1846,10 @@ xfs_rmap_convert_shared( NEW.rm_offset, NEW.rm_flags, &i); if (error) goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto done; + } NEW.rm_blockcount = offset - NEW.rm_offset; error = xfs_rmap_update(cur, &NEW); if (error) @@ -1765,25 +1935,44 @@ xfs_rmap_unmap_shared( <rec, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } ltoff = ltrec.rm_offset; /* Make sure the extent we found covers the entire freeing range. */ - XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); + if (XFS_IS_CORRUPT(mp, + ltrec.rm_startblock > bno || + ltrec.rm_startblock + ltrec.rm_blockcount < + bno + len)) { + error = -EFSCORRUPTED; + goto out_error; + } /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner, out_error); + if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) { + error = -EFSCORRUPTED; + goto out_error; + } /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + if (XFS_IS_CORRUPT(mp, + (flags & XFS_RMAP_UNWRITTEN) != + (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) { + error = -EFSCORRUPTED; + goto out_error; + } /* Check the offset. */ - XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, offset <= ltoff + ltrec.rm_blockcount, - out_error); + if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) { + error = -EFSCORRUPTED; + goto out_error; + } + if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) { + error = -EFSCORRUPTED; + goto out_error; + } if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* Exact match, simply remove the record from rmap tree. */ @@ -1836,7 +2025,10 @@ xfs_rmap_unmap_shared( ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } ltrec.rm_blockcount -= len; error = xfs_rmap_update(cur, <rec); if (error) @@ -1862,7 +2054,10 @@ xfs_rmap_unmap_shared( ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } ltrec.rm_blockcount = bno - ltrec.rm_startblock; error = xfs_rmap_update(cur, <rec); if (error) @@ -1938,7 +2133,10 @@ xfs_rmap_map_shared( error = xfs_rmap_get_rec(cur, >rec, &have_gt); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error); + if (XFS_IS_CORRUPT(mp, have_gt != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, cur->bc_private.a.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, @@ -1987,7 +2185,10 @@ xfs_rmap_map_shared( ltrec.rm_offset, ltrec.rm_flags, &i); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_error; + } error = xfs_rmap_update(cur, <rec); if (error) @@ -2199,7 +2400,7 @@ xfs_rmap_finish_one( error = xfs_free_extent_fix_freelist(tp, agno, &agbp); if (error) return error; - if (!agbp) + if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) return -EFSCORRUPTED; rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 8ea1efc97b41..f42c74cb8be5 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -15,7 +15,7 @@ #include "xfs_bmap.h" #include "xfs_trans.h" #include "xfs_rtalloc.h" - +#include "xfs_error.h" /* * Realtime allocator bitmap functions shared with userspace. @@ -70,7 +70,7 @@ xfs_rtbuf_get( if (error) return error; - if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) + if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_real_extent(&map))) return -EFSCORRUPTED; ASSERT(map.br_startblock != NULLFSBLOCK); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ac6cdca63e15..0ac69751fe85 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -10,6 +10,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index a9ad90926b87..2b8ccb5b975d 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -55,7 +55,7 @@ xfs_trans_ichgtime( int flags) { struct inode *inode = VFS_I(ip); - struct timespec64 tv; + struct timespec64 tv; ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -66,10 +66,8 @@ xfs_trans_ichgtime( inode->i_mtime = tv; if (flags & XFS_ICHGTIME_CHG) inode->i_ctime = tv; - if (flags & XFS_ICHGTIME_CREATE) { - ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; - ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec; - } + if (flags & XFS_ICHGTIME_CREATE) + ip->i_d.di_crtime = tv; } /* diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index d12bbd526e7c..c55cd9a3dec9 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -718,7 +718,7 @@ xfs_calc_clear_agi_bucket_reservation( /* * Adjusting quota limits. - * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) + * the disk quota buffer: sizeof(struct xfs_disk_dquot) */ STATIC uint xfs_calc_qm_setqlim_reservation(void) @@ -742,7 +742,7 @@ xfs_calc_qm_dqalloc_reservation( /* * Turning off quotas. - * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 * the superblock for the quota flags: sector size */ STATIC uint @@ -755,7 +755,7 @@ xfs_calc_qm_quotaoff_reservation( /* * End of turning off quotas. - * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 + * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 */ STATIC uint xfs_calc_qm_quotaoff_end_reservation(void) diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 300b3e91ca3a..397d94775440 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -21,7 +21,6 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ typedef int64_t xfs_lsn_t; /* log sequence number */ -typedef int32_t xfs_tid_t; /* transaction identifier */ typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ @@ -33,7 +32,6 @@ typedef uint64_t xfs_fileoff_t; /* block number in a file */ typedef uint64_t xfs_filblks_t; /* number of blocks in a file */ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -typedef int64_t xfs_sfiloff_t; /* signed block number in a file */ /* * New verifiers will return the instruction address of the failing check. diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 0edc7f8eb96e..d9f0dd444b80 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -398,15 +398,14 @@ out: STATIC int xchk_xattr_rec( struct xchk_da_btree *ds, - int level, - void *rec) + int level) { struct xfs_mount *mp = ds->state->mp; - struct xfs_attr_leaf_entry *ent = rec; - struct xfs_da_state_blk *blk; + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; struct xfs_attr_leaf_name_local *lentry; struct xfs_attr_leaf_name_remote *rentry; struct xfs_buf *bp; + struct xfs_attr_leaf_entry *ent; xfs_dahash_t calc_hash; xfs_dahash_t hash; int nameidx; @@ -414,7 +413,9 @@ xchk_xattr_rec( unsigned int badflags; int error; - blk = &ds->state->path.blk[level]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + + ent = xfs_attr3_leaf_entryp(blk->bp->b_addr) + blk->index; /* Check the whole block, if necessary. */ error = xchk_xattr_block(ds, level); diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 3d47d111be5a..18a684e18a69 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -294,5 +294,6 @@ xfs_bitmap_set_btblocks( struct xfs_bitmap *bitmap, struct xfs_btree_cur *cur) { - return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, bitmap); + return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, + XFS_BTREE_VISIT_ALL, bitmap); } diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 003a772cd26c..2e50d146105d 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -14,8 +14,15 @@ static inline bool xchk_should_terminate( struct xfs_scrub *sc, - int *error) + int *error) { + /* + * If preemption is disabled, we need to yield to the scheduler every + * few seconds so that we don't run afoul of the soft lockup watchdog + * or RCU stall detector. + */ + cond_resched(); + if (fatal_signal_pending(current)) { if (*error == 0) *error = -EAGAIN; diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 77ff9f97bcda..97a15b6f2865 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -77,40 +77,18 @@ xchk_da_set_corrupt( __return_address); } -/* Find an entry at a certain level in a da btree. */ -STATIC void * -xchk_da_btree_entry( - struct xchk_da_btree *ds, - int level, - int rec) +static struct xfs_da_node_entry * +xchk_da_btree_node_entry( + struct xchk_da_btree *ds, + int level) { - char *ents; - struct xfs_da_state_blk *blk; - void *baddr; + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; + struct xfs_da3_icnode_hdr hdr; - /* Dispatch the entry finding function. */ - blk = &ds->state->path.blk[level]; - baddr = blk->bp->b_addr; - switch (blk->magic) { - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: - ents = (char *)xfs_attr3_leaf_entryp(baddr); - return ents + (rec * sizeof(struct xfs_attr_leaf_entry)); - case XFS_DIR2_LEAFN_MAGIC: - case XFS_DIR3_LEAFN_MAGIC: - ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); - return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); - case XFS_DIR2_LEAF1_MAGIC: - case XFS_DIR3_LEAF1_MAGIC: - ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); - return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); - case XFS_DA_NODE_MAGIC: - case XFS_DA3_NODE_MAGIC: - ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr); - return ents + (rec * sizeof(struct xfs_da_node_entry)); - } + ASSERT(blk->magic == XFS_DA_NODE_MAGIC); - return NULL; + xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); + return hdr.btree + blk->index; } /* Scrub a da btree hash (key). */ @@ -120,7 +98,6 @@ xchk_da_btree_hash( int level, __be32 *hashp) { - struct xfs_da_state_blk *blks; struct xfs_da_node_entry *entry; xfs_dahash_t hash; xfs_dahash_t parent_hash; @@ -135,8 +112,7 @@ xchk_da_btree_hash( return 0; /* Is this hash no larger than the parent hash? */ - blks = ds->state->path.blk; - entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index); + entry = xchk_da_btree_node_entry(ds, level - 1); parent_hash = be32_to_cpu(entry->hashval); if (parent_hash < hash) xchk_da_set_corrupt(ds, level); @@ -355,8 +331,8 @@ xchk_da_btree_block( goto out_nobuf; /* Read the buffer. */ - error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2, - &blk->bp, dargs->whichfork, + error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, + XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, &xchk_da_btree_buf_ops); if (!xchk_da_process_error(ds, level, &error)) goto out_nobuf; @@ -433,8 +409,8 @@ xchk_da_btree_block( XFS_BLFT_DA_NODE_BUF); blk->magic = XFS_DA_NODE_MAGIC; node = blk->bp->b_addr; - ip->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = ip->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); + btree = nodehdr.btree; *pmaxrecs = nodehdr.count; blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); if (level == 0) { @@ -479,14 +455,12 @@ xchk_da_btree( struct xfs_mount *mp = sc->mp; struct xfs_da_state_blk *blks; struct xfs_da_node_entry *key; - void *rec; xfs_dablk_t blkno; int level; int error; /* Skip short format data structures; no btree to scan. */ - if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE) + if (!xfs_ifork_has_extents(sc->ip, whichfork)) return 0; /* Set up initial da state. */ @@ -538,9 +512,7 @@ xchk_da_btree( } /* Dispatch record scrubbing. */ - rec = xchk_da_btree_entry(&ds, level, - blks[level].index); - error = scrub_fn(&ds, level, rec); + error = scrub_fn(&ds, level); if (error) break; if (xchk_should_terminate(sc, &error) || @@ -562,7 +534,7 @@ xchk_da_btree( } /* Hashes in order for scrub? */ - key = xchk_da_btree_entry(&ds, level, blks[level].index); + key = xchk_da_btree_node_entry(&ds, level); error = xchk_da_btree_hash(&ds, level, &key->hashval); if (error) goto out; diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h index cb3f0003245b..1f3515c6d5a8 100644 --- a/fs/xfs/scrub/dabtree.h +++ b/fs/xfs/scrub/dabtree.h @@ -28,8 +28,7 @@ struct xchk_da_btree { int tree_level; }; -typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds, - int level, void *rec); +typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds, int level); /* Check for da btree operation errors. */ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 1e2e11721eb9..266da4e4bde6 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -113,6 +113,9 @@ xchk_dir_actor( offset = xfs_dir2_db_to_da(mp->m_dir_geo, xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos)); + if (xchk_should_terminate(sdc->sc, &error)) + return error; + /* Does this inode number make sense? */ if (!xfs_verify_dir_ino(mp, ino)) { xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -179,15 +182,17 @@ out: STATIC int xchk_dir_rec( struct xchk_da_btree *ds, - int level, - void *rec) + int level) { + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; struct xfs_mount *mp = ds->state->mp; - struct xfs_dir2_leaf_entry *ent = rec; struct xfs_inode *dp = ds->dargs.dp; + struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_dir2_data_entry *dent; struct xfs_buf *bp; - char *p, *endp; + struct xfs_dir2_leaf_entry *ent; + unsigned int end; + unsigned int iter_off; xfs_ino_t ino; xfs_dablk_t rec_bno; xfs_dir2_db_t db; @@ -195,9 +200,16 @@ xchk_dir_rec( xfs_dir2_dataptr_t ptr; xfs_dahash_t calc_hash; xfs_dahash_t hash; + struct xfs_dir3_icleaf_hdr hdr; unsigned int tag; int error; + ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC || + blk->magic == XFS_DIR2_LEAFN_MAGIC); + + xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr); + ent = hdr.ents + blk->index; + /* Check the hash of the entry. */ error = xchk_da_btree_hash(ds, level, &ent->hashval); if (error) @@ -209,15 +221,16 @@ xchk_dir_rec( return 0; /* Find the directory entry's location. */ - db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr); - off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr); - rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db); + db = xfs_dir2_dataptr_to_db(geo, ptr); + off = xfs_dir2_dataptr_to_off(geo, ptr); + rec_bno = xfs_dir2_db_to_da(geo, db); - if (rec_bno >= mp->m_dir_geo->leafblk) { + if (rec_bno >= geo->leafblk) { xchk_da_set_corrupt(ds, level); goto out; } - error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp); + error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, + XFS_DABUF_MAP_HOLE_OK, &bp); if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno, &error)) goto out; @@ -230,38 +243,37 @@ xchk_dir_rec( if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out_relse; - dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); + dent = bp->b_addr + off; /* Make sure we got a real directory entry. */ - p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); - endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); - if (!endp) { + iter_off = geo->data_entry_offset; + end = xfs_dir3_data_end_offset(geo, bp->b_addr); + if (!end) { xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out_relse; } - while (p < endp) { - struct xfs_dir2_data_entry *dep; - struct xfs_dir2_data_unused *dup; + for (;;) { + struct xfs_dir2_data_entry *dep = bp->b_addr + iter_off; + struct xfs_dir2_data_unused *dup = bp->b_addr + iter_off; + + if (iter_off >= end) { + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } - dup = (struct xfs_dir2_data_unused *)p; if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - p += be16_to_cpu(dup->length); + iter_off += be16_to_cpu(dup->length); continue; } - dep = (struct xfs_dir2_data_entry *)p; if (dep == dent) break; - p += mp->m_dir_inode_ops->data_entsize(dep->namelen); - } - if (p >= endp) { - xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); - goto out_relse; + iter_off += xfs_dir2_data_entsize(mp, dep->namelen); } /* Retrieve the entry, sanity check it, and compare hashes. */ ino = be64_to_cpu(dent->inumber); hash = be32_to_cpu(ent->hashval); - tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); + tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent)); if (!xfs_verify_dir_ino(mp, ino) || tag != off) xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); if (dent->namelen == 0) { @@ -319,19 +331,15 @@ xchk_directory_data_bestfree( struct xfs_buf *bp; struct xfs_dir2_data_free *bf; struct xfs_mount *mp = sc->mp; - const struct xfs_dir_ops *d_ops; - char *ptr; - char *endptr; u16 tag; unsigned int nr_bestfrees = 0; unsigned int nr_frees = 0; unsigned int smallest_bestfree; int newlen; - int offset; + unsigned int offset; + unsigned int end; int error; - d_ops = sc->ip->d_ops; - if (is_block) { /* dir block format */ if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET)) @@ -339,7 +347,7 @@ xchk_directory_data_bestfree( error = xfs_dir3_block_read(sc->tp, sc->ip, &bp); } else { /* dir data format */ - error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp); + error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, 0, &bp); } if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; @@ -351,7 +359,7 @@ xchk_directory_data_bestfree( goto out_buf; /* Do the bestfrees correspond to actual free space? */ - bf = d_ops->data_bestfree_p(bp->b_addr); + bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr); smallest_bestfree = UINT_MAX; for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { offset = be16_to_cpu(dfp->offset); @@ -361,13 +369,13 @@ xchk_directory_data_bestfree( xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } - dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset); + dup = bp->b_addr + offset; tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); /* bestfree doesn't match the entry it points at? */ if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) || be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) || - tag != ((char *)dup - (char *)bp->b_addr)) { + tag != offset) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } @@ -383,30 +391,30 @@ xchk_directory_data_bestfree( } /* Make sure the bestfrees are actually the best free spaces. */ - ptr = (char *)d_ops->data_entry_p(bp->b_addr); - endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); + offset = mp->m_dir_geo->data_entry_offset; + end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr); /* Iterate the entries, stopping when we hit or go past the end. */ - while (ptr < endptr) { - dup = (struct xfs_dir2_data_unused *)ptr; + while (offset < end) { + dup = bp->b_addr + offset; + /* Skip real entries */ if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) { - struct xfs_dir2_data_entry *dep; + struct xfs_dir2_data_entry *dep = bp->b_addr + offset; - dep = (struct xfs_dir2_data_entry *)ptr; - newlen = d_ops->data_entsize(dep->namelen); + newlen = xfs_dir2_data_entsize(mp, dep->namelen); if (newlen <= 0) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } - ptr += newlen; + offset += newlen; continue; } /* Spot check this free entry */ tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); - if (tag != ((char *)dup - (char *)bp->b_addr)) { + if (tag != offset) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } @@ -425,13 +433,13 @@ xchk_directory_data_bestfree( xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } - ptr += newlen; - if (ptr <= endptr) + offset += newlen; + if (offset <= end) nr_frees++; } /* We're required to fill all the space. */ - if (ptr != endptr) + if (offset != end) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); /* Did we see at least as many free slots as there are bestfrees? */ @@ -458,7 +466,7 @@ xchk_directory_check_freesp( { struct xfs_dir2_data_free *dfp; - dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr); + dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr); if (len != be16_to_cpu(dfp->length)) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); @@ -475,12 +483,10 @@ xchk_directory_leaf1_bestfree( xfs_dablk_t lblk) { struct xfs_dir3_icleaf_hdr leafhdr; - struct xfs_dir2_leaf_entry *ents; struct xfs_dir2_leaf_tail *ltp; struct xfs_dir2_leaf *leaf; struct xfs_buf *dbp; struct xfs_buf *bp; - const struct xfs_dir_ops *d_ops = sc->ip->d_ops; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; __be16 *bestp; __u16 best; @@ -492,14 +498,13 @@ xchk_directory_leaf1_bestfree( int error; /* Read the free space block. */ - error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); + error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, &bp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; xchk_buffer_recheck(sc, bp); leaf = bp->b_addr; - d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = d_ops->leaf_ents_p(leaf); + xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf); ltp = xfs_dir2_leaf_tail_p(geo, leaf); bestcount = be32_to_cpu(ltp->bestcount); bestp = xfs_dir2_leaf_bests_p(ltp); @@ -521,24 +526,25 @@ xchk_directory_leaf1_bestfree( } /* Is the leaf count even remotely sane? */ - if (leafhdr.count > d_ops->leaf_max_ents(geo)) { + if (leafhdr.count > geo->leaf_max_ents) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } /* Leaves and bests don't overlap in leaf format. */ - if ((char *)&ents[leafhdr.count] > (char *)bestp) { + if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } /* Check hash value order, count stale entries. */ for (i = 0; i < leafhdr.count; i++) { - hash = be32_to_cpu(ents[i].hashval); + hash = be32_to_cpu(leafhdr.ents[i].hashval); if (i > 0 && lasthash > hash) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); lasthash = hash; - if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + if (leafhdr.ents[i].address == + cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (leafhdr.stale != stale) @@ -552,7 +558,7 @@ xchk_directory_leaf1_bestfree( if (best == NULLDATAOFF) continue; error = xfs_dir3_data_read(sc->tp, sc->ip, - i * args->geo->fsbcount, -1, &dbp); + i * args->geo->fsbcount, 0, &dbp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; @@ -575,7 +581,6 @@ xchk_directory_free_bestfree( struct xfs_dir3_icfree_hdr freehdr; struct xfs_buf *dbp; struct xfs_buf *bp; - __be16 *bestp; __u16 best; unsigned int stale = 0; int i; @@ -595,17 +600,16 @@ xchk_directory_free_bestfree( } /* Check all the entries. */ - sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr); - bestp = sc->ip->d_ops->free_bests_p(bp->b_addr); - for (i = 0; i < freehdr.nvalid; i++, bestp++) { - best = be16_to_cpu(*bestp); + xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr); + for (i = 0; i < freehdr.nvalid; i++) { + best = be16_to_cpu(freehdr.bests[i]); if (best == NULLDATAOFF) { stale++; continue; } error = xfs_dir3_data_read(sc->tp, sc->ip, (freehdr.firstdb + i) * args->geo->fsbcount, - -1, &dbp); + 0, &dbp); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 98f82d7c8b40..7251c66a82c9 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -104,7 +104,7 @@ next_loop_perag: pag = NULL; error = 0; - if (fatal_signal_pending(current)) + if (xchk_should_terminate(sc, &error)) break; } @@ -163,6 +163,7 @@ xchk_fscount_aggregate_agcounts( uint64_t delayed; xfs_agnumber_t agno; int tries = 8; + int error = 0; retry: fsc->icount = 0; @@ -196,10 +197,13 @@ retry: xfs_perag_put(pag); - if (fatal_signal_pending(current)) + if (xchk_should_terminate(sc, &error)) break; } + if (error) + return error; + /* * The global incore space reservation is taken from the incore * counters, so leave that out of the computation. diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index b2f602811e9d..83d27cdf579b 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -11,6 +11,7 @@ #include "xfs_sb.h" #include "xfs_health.h" #include "scrub/scrub.h" +#include "scrub/health.h" /* * Scrub and In-Core Filesystem Health Assessments diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index c962bd534690..5705adc43a75 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -32,8 +32,10 @@ xchk_setup_parent( struct xchk_parent_ctx { struct dir_context dc; + struct xfs_scrub *sc; xfs_ino_t ino; xfs_nlink_t nlink; + bool cancelled; }; /* Look for a single entry in a directory pointing to an inode. */ @@ -47,11 +49,21 @@ xchk_parent_actor( unsigned type) { struct xchk_parent_ctx *spc; + int error = 0; spc = container_of(dc, struct xchk_parent_ctx, dc); if (spc->ino == ino) spc->nlink++; - return 0; + + /* + * If we're facing a fatal signal, bail out. Store the cancellation + * status separately because the VFS readdir code squashes error codes + * into short directory reads. + */ + if (xchk_should_terminate(spc->sc, &error)) + spc->cancelled = true; + + return error; } /* Count the number of dentries in the parent dir that point to this inode. */ @@ -62,10 +74,9 @@ xchk_parent_count_parent_dentries( xfs_nlink_t *nlink) { struct xchk_parent_ctx spc = { - .dc.actor = xchk_parent_actor, - .dc.pos = 0, - .ino = sc->ip->i_ino, - .nlink = 0, + .dc.actor = xchk_parent_actor, + .ino = sc->ip->i_ino, + .sc = sc, }; size_t bufsize; loff_t oldpos; @@ -80,7 +91,7 @@ xchk_parent_count_parent_dentries( */ lock_mode = xfs_ilock_data_map_shared(parent); if (parent->i_d.di_nextents > 0) - error = xfs_dir3_data_readahead(parent, 0, -1); + error = xfs_dir3_data_readahead(parent, 0, 0); xfs_iunlock(parent, lock_mode); if (error) return error; @@ -97,6 +108,10 @@ xchk_parent_count_parent_dentries( error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); if (error) goto out; + if (spc.cancelled) { + error = -EAGAIN; + goto out; + } if (oldpos == spc.dc.pos) break; oldpos = spc.dc.pos; diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 0a33b4421c32..905a34558361 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -93,6 +93,10 @@ xchk_quota_item( unsigned long long rcount; xfs_ino_t fs_icount; xfs_dqid_t id = be32_to_cpu(d->d_id); + int error = 0; + + if (xchk_should_terminate(sc, &error)) + return error; /* * Except for the root dquot, the actual dquot we got must either have @@ -178,6 +182,9 @@ xchk_quota_item( if (id != 0 && rhard != 0 && rcount > rhard) xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return -EFSCORRUPTED; + return 0; } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 15c8c5f3f688..f1775bb19313 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -16,6 +16,7 @@ #include "xfs_qm.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 96d7071cfa46..91693fce34a8 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -12,8 +12,10 @@ #include "xfs_inode.h" #include "xfs_attr.h" #include "xfs_trace.h" -#include <linux/posix_acl_xattr.h> +#include "xfs_error.h" +#include "xfs_acl.h" +#include <linux/posix_acl_xattr.h> /* * Locking scheme: @@ -23,6 +25,7 @@ STATIC struct posix_acl * xfs_acl_from_disk( + struct xfs_mount *mp, const struct xfs_acl *aclp, int len, int max_entries) @@ -32,11 +35,18 @@ xfs_acl_from_disk( const struct xfs_acl_entry *ace; unsigned int count, i; - if (len < sizeof(*aclp)) + if (len < sizeof(*aclp)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, + len); return ERR_PTR(-EFSCORRUPTED); + } + count = be32_to_cpu(aclp->acl_cnt); - if (count > max_entries || XFS_ACL_SIZE(count) != len) + if (count > max_entries || XFS_ACL_SIZE(count) != len) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, + len); return ERR_PTR(-EFSCORRUPTED); + } acl = posix_acl_alloc(count, GFP_KERNEL); if (!acl) @@ -145,7 +155,7 @@ xfs_get_acl(struct inode *inode, int type) if (error != -ENOATTR) acl = ERR_PTR(error); } else { - acl = xfs_acl_from_disk(xfs_acl, len, + acl = xfs_acl_from_disk(ip->i_mount, xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); kmem_free(xfs_acl); } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5936507c6f50..3a688eb5c5ae 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -30,32 +30,6 @@ XFS_WPC(struct iomap_writepage_ctx *ctx) return container_of(ctx, struct xfs_writepage_ctx, ctx); } -struct block_device * -xfs_find_bdev_for_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - if (XFS_IS_REALTIME_INODE(ip)) - return mp->m_rtdev_targp->bt_bdev; - else - return mp->m_ddev_targp->bt_bdev; -} - -struct dax_device * -xfs_find_daxdev_for_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - if (XFS_IS_REALTIME_INODE(ip)) - return mp->m_rtdev_targp->bt_daxdev; - else - return mp->m_ddev_targp->bt_daxdev; -} - /* * Fast and loose check if this write could update the on-disk inode size. */ @@ -609,9 +583,11 @@ xfs_dax_writepages( struct address_space *mapping, struct writeback_control *wbc) { - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + struct xfs_inode *ip = XFS_I(mapping->host); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); return dax_writeback_mapping_range(mapping, - xfs_find_bdev_for_inode(mapping->host), wbc); + xfs_inode_buftarg(ip)->bt_bdev, wbc); } STATIC sector_t @@ -634,7 +610,7 @@ xfs_vm_bmap( */ if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; - return iomap_bmap(mapping, block, &xfs_iomap_ops); + return iomap_bmap(mapping, block, &xfs_read_iomap_ops); } STATIC int @@ -642,7 +618,7 @@ xfs_vm_readpage( struct file *unused, struct page *page) { - return iomap_readpage(page, &xfs_iomap_ops); + return iomap_readpage(page, &xfs_read_iomap_ops); } STATIC int @@ -652,7 +628,7 @@ xfs_vm_readpages( struct list_head *pages, unsigned nr_pages) { - return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops); + return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops); } static int @@ -661,8 +637,9 @@ xfs_iomap_swapfile_activate( struct file *swap_file, sector_t *span) { - sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file)); - return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops); + sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, + &xfs_read_iomap_ops); } const struct address_space_operations xfs_address_space_operations = { diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 687b11f34fa2..e0bd68419764 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -11,7 +11,4 @@ extern const struct address_space_operations xfs_dax_aops; int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); -extern struct block_device *xfs_find_bdev_for_inode(struct inode *); -extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); - #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index a640a285cc52..5ff49523d8ea 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -22,6 +22,7 @@ #include "xfs_attr_leaf.h" #include "xfs_quota.h" #include "xfs_dir2.h" +#include "xfs_error.h" /* * Look at all the extents for this logical region, @@ -190,37 +191,35 @@ xfs_attr3_leaf_inactive( */ STATIC int xfs_attr3_node_inactive( - struct xfs_trans **trans, - struct xfs_inode *dp, - struct xfs_buf *bp, - int level) + struct xfs_trans **trans, + struct xfs_inode *dp, + struct xfs_buf *bp, + int level) { - xfs_da_blkinfo_t *info; - xfs_da_intnode_t *node; - xfs_dablk_t child_fsb; - xfs_daddr_t parent_blkno, child_blkno; - int error, i; - struct xfs_buf *child_bp; - struct xfs_da_node_entry *btree; + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_blkinfo *info; + xfs_dablk_t child_fsb; + xfs_daddr_t parent_blkno, child_blkno; + struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; + int error, i; /* * Since this code is recursive (gasp!) we must protect ourselves. */ if (level > XFS_DA_NODE_MAXDEPTH) { xfs_trans_brelse(*trans, bp); /* no locks for later trans */ - return -EIO; + xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; } - node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&ichdr, node); + xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); parent_blkno = bp->b_bn; if (!ichdr.count) { xfs_trans_brelse(*trans, bp); return 0; } - btree = dp->d_ops->node_tree_p(node); - child_fsb = be32_to_cpu(btree[0].before); + child_fsb = be32_to_cpu(ichdr.btree[0].before); xfs_trans_brelse(*trans, bp); /* no locks for later trans */ /* @@ -235,7 +234,7 @@ xfs_attr3_node_inactive( * traversal of the tree so we may deal with many blocks * before we come back to this one. */ - error = xfs_da3_node_read(*trans, dp, child_fsb, -1, &child_bp, + error = xfs_da3_node_read(*trans, dp, child_fsb, &child_bp, XFS_ATTR_FORK); if (error) return error; @@ -258,8 +257,9 @@ xfs_attr3_node_inactive( error = xfs_attr3_leaf_inactive(trans, dp, child_bp); break; default: - error = -EIO; + xfs_buf_corruption_error(child_bp); xfs_trans_brelse(*trans, child_bp); + error = -EFSCORRUPTED; break; } if (error) @@ -268,10 +268,16 @@ xfs_attr3_node_inactive( /* * Remove the subsidiary block from the cache and from the log. */ - error = xfs_da_get_buf(*trans, dp, 0, child_blkno, &child_bp, - XFS_ATTR_FORK); - if (error) + child_bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp, + child_blkno, + XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0); + if (!child_bp) + return -EIO; + error = bp->b_error; + if (error) { + xfs_trans_brelse(*trans, child_bp); return error; + } xfs_trans_binval(*trans, child_bp); /* @@ -279,13 +285,15 @@ xfs_attr3_node_inactive( * child block number. */ if (i + 1 < ichdr.count) { - error = xfs_da3_node_read(*trans, dp, 0, parent_blkno, - &bp, XFS_ATTR_FORK); + struct xfs_da3_icnode_hdr phdr; + + error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); if (error) return error; - node = bp->b_addr; - btree = dp->d_ops->node_tree_p(node); - child_fsb = be32_to_cpu(btree[i + 1].before); + xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, + bp->b_addr); + child_fsb = be32_to_cpu(phdr.btree[i + 1].before); xfs_trans_brelse(*trans, bp); } /* @@ -310,6 +318,7 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { + struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; xfs_daddr_t blkno; @@ -321,7 +330,7 @@ xfs_attr3_root_inactive( * the extents in reverse order the extent containing * block 0 must still be there. */ - error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); + error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; blkno = bp->b_bn; @@ -341,7 +350,8 @@ xfs_attr3_root_inactive( error = xfs_attr3_leaf_inactive(trans, dp, bp); break; default: - error = -EIO; + error = -EFSCORRUPTED; + xfs_buf_corruption_error(bp); xfs_trans_brelse(*trans, bp); break; } @@ -351,9 +361,15 @@ xfs_attr3_root_inactive( /* * Invalidate the incore copy of the root block. */ - error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK); - if (error) + bp = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, + XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0); + if (!bp) + return -EIO; + error = bp->b_error; + if (error) { + xfs_trans_brelse(*trans, bp); return error; + } xfs_trans_binval(*trans, bp); /* remove from cache */ /* * Commit the invalidate and start the next transaction. diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 00758fdc2fec..d37743bdf274 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -49,14 +49,16 @@ xfs_attr_shortform_compare(const void *a, const void *b) * we can begin returning them to the user. */ static int -xfs_attr_shortform_list(xfs_attr_list_context_t *context) +xfs_attr_shortform_list( + struct xfs_attr_list_context *context) { - attrlist_cursor_kern_t *cursor; - xfs_attr_sf_sort_t *sbuf, *sbp; - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - xfs_inode_t *dp; - int sbsize, nsbuf, count, i; + struct attrlist_cursor_kern *cursor; + struct xfs_attr_sf_sort *sbuf, *sbp; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + struct xfs_inode *dp; + int sbsize, nsbuf, count, i; + int error = 0; ASSERT(context != NULL); dp = context->dp; @@ -84,6 +86,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) (XFS_ISRESET_CURSOR(cursor) && (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { + if (XFS_IS_CORRUPT(context->dp->i_mount, + !xfs_attr_namecheck(sfe->nameval, + sfe->namelen))) + return -EFSCORRUPTED; context->put_listent(context, sfe->flags, sfe->nameval, @@ -161,10 +167,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) break; } } - if (i == nsbuf) { - kmem_free(sbuf); - return 0; - } + if (i == nsbuf) + goto out; /* * Loop putting entries into the user buffer. @@ -174,6 +178,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) cursor->hashval = sbp->hash; cursor->offset = 0; } + if (XFS_IS_CORRUPT(context->dp->i_mount, + !xfs_attr_namecheck(sbp->name, + sbp->namelen))) { + error = -EFSCORRUPTED; + goto out; + } context->put_listent(context, sbp->flags, sbp->name, @@ -183,9 +193,9 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) break; cursor->offset++; } - +out: kmem_free(sbuf); - return 0; + return error; } /* @@ -213,7 +223,7 @@ xfs_attr_node_list_lookup( ASSERT(*pbp == NULL); cursor->blkno = 0; for (;;) { - error = xfs_da3_node_read(tp, dp, cursor->blkno, -1, &bp, + error = xfs_da3_node_read(tp, dp, cursor->blkno, &bp, XFS_ATTR_FORK); if (error) return error; @@ -229,7 +239,7 @@ xfs_attr_node_list_lookup( goto out_corruptbuf; } - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(mp, &nodehdr, node); /* Tree taller than we can handle; bail out! */ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) @@ -243,7 +253,7 @@ xfs_attr_node_list_lookup( else expected_level--; - btree = dp->d_ops->node_tree_p(node); + btree = nodehdr.btree; for (i = 0; i < nodehdr.count; btree++, i++) { if (cursor->hashval <= be32_to_cpu(btree->hashval)) { cursor->blkno = be32_to_cpu(btree->before); @@ -258,7 +268,7 @@ xfs_attr_node_list_lookup( return 0; /* We can't point back to the root. */ - if (cursor->blkno == 0) + if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) return -EFSCORRUPTED; } @@ -269,6 +279,7 @@ xfs_attr_node_list_lookup( return 0; out_corruptbuf: + xfs_buf_corruption_error(bp); xfs_trans_brelse(tp, bp); return -EFSCORRUPTED; } @@ -284,7 +295,7 @@ xfs_attr_node_list( struct xfs_buf *bp; struct xfs_inode *dp = context->dp; struct xfs_mount *mp = dp->i_mount; - int error; + int error = 0; trace_xfs_attr_node_list(context); @@ -298,8 +309,8 @@ xfs_attr_node_list( */ bp = NULL; if (cursor->blkno > 0) { - error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1, - &bp, XFS_ATTR_FORK); + error = xfs_da3_node_read(context->tp, dp, cursor->blkno, &bp, + XFS_ATTR_FORK); if ((error != 0) && (error != -EFSCORRUPTED)) return error; if (bp) { @@ -358,24 +369,27 @@ xfs_attr_node_list( */ for (;;) { leaf = bp->b_addr; - xfs_attr3_leaf_list_int(bp, context); + error = xfs_attr3_leaf_list_int(bp, context); + if (error) + break; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); if (context->seen_enough || leafhdr.forw == 0) break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(context->tp, bp); - error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp); + error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, + &bp); if (error) return error; } xfs_trans_brelse(context->tp, bp); - return 0; + return error; } /* * Copy out attribute list entries for attr_list(), for leaf attribute lists. */ -void +int xfs_attr3_leaf_list_int( struct xfs_buf *bp, struct xfs_attr_list_context *context) @@ -417,7 +431,7 @@ xfs_attr3_leaf_list_int( } if (i == ichdr.count) { trace_xfs_attr_list_notfound(context); - return; + return 0; } } else { entry = &entries[0]; @@ -457,6 +471,9 @@ xfs_attr3_leaf_list_int( valuelen = be32_to_cpu(name_rmt->valuelen); } + if (XFS_IS_CORRUPT(context->dp->i_mount, + !xfs_attr_namecheck(name, namelen))) + return -EFSCORRUPTED; context->put_listent(context, entry->flags, name, namelen, valuelen); if (context->seen_enough) @@ -464,7 +481,7 @@ xfs_attr3_leaf_list_int( cursor->offset++; } trace_xfs_attr_list_leaf_end(context); - return; + return 0; } /* @@ -479,13 +496,13 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) trace_xfs_attr_leaf_list(context); context->cursor->blkno = 0; - error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp); + error = xfs_attr3_leaf_read(context->tp, context->dp, 0, &bp); if (error) return error; - xfs_attr3_leaf_list_int(bp, context); + error = xfs_attr3_leaf_list_int(bp, context); xfs_trans_brelse(context->tp, bp); - return 0; + return error; } int diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 83d24e983d4c..ee6f4229cebc 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -21,7 +21,7 @@ #include "xfs_icache.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" - +#include "xfs_error.h" kmem_zone_t *xfs_bui_zone; kmem_zone_t *xfs_bud_zone; @@ -35,7 +35,7 @@ void xfs_bui_item_free( struct xfs_bui_log_item *buip) { - kmem_zone_free(xfs_bui_zone, buip); + kmem_cache_free(xfs_bui_zone, buip); } /* @@ -201,7 +201,7 @@ xfs_bud_item_release( struct xfs_bud_log_item *budp = BUD_ITEM(lip); xfs_bui_release(budp->bud_buip); - kmem_zone_free(xfs_bud_zone, budp); + kmem_cache_free(xfs_bud_zone, budp); } static const struct xfs_item_ops xfs_bud_item_ops = { @@ -456,7 +456,7 @@ xfs_bui_recover( if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); xfs_bui_release(buip); - return -EIO; + return -EFSCORRUPTED; } /* @@ -490,7 +490,7 @@ xfs_bui_recover( */ set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); xfs_bui_release(buip); - return -EIO; + return -EFSCORRUPTED; } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, @@ -525,6 +525,7 @@ xfs_bui_recover( type = bui_type; break; default: + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto err_inode; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 4f443703065e..2efd78a9719e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -53,15 +53,16 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) */ int xfs_zero_extent( - struct xfs_inode *ip, - xfs_fsblock_t start_fsb, - xfs_off_t count_fsb) + struct xfs_inode *ip, + xfs_fsblock_t start_fsb, + xfs_off_t count_fsb) { - struct xfs_mount *mp = ip->i_mount; - xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); - sector_t block = XFS_BB_TO_FSBT(mp, sector); + struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); + sector_t block = XFS_BB_TO_FSBT(mp, sector); - return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), + return blkdev_issue_zeroout(target->bt_bdev, block << (mp->m_super->s_blocksize_bits - 9), count_fsb << (mp->m_super->s_blocksize_bits - 9), GFP_NOFS, 0); @@ -164,13 +165,6 @@ xfs_bmap_rtalloc( xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); - - /* Zero the extent if we were asked to do so */ - if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) { - error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); - if (error) - return error; - } } else { ap->length = 0; } @@ -179,29 +173,6 @@ xfs_bmap_rtalloc( #endif /* CONFIG_XFS_RT */ /* - * Check if the endoff is outside the last extent. If so the caller will grow - * the allocation to a stripe unit boundary. All offsets are considered outside - * the end of file for an empty fork, so 1 is returned in *eof in that case. - */ -int -xfs_bmap_eof( - struct xfs_inode *ip, - xfs_fileoff_t endoff, - int whichfork, - int *eof) -{ - struct xfs_bmbt_irec rec; - int error; - - error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); - if (error || *eof) - return error; - - *eof = endoff >= rec.br_startoff + rec.br_blockcount; - return 0; -} - -/* * Extent tree block counting routines. */ @@ -229,106 +200,6 @@ xfs_bmap_count_leaves( } /* - * Count leaf blocks given a range of extent records originally - * in btree format. - */ -STATIC void -xfs_bmap_disk_count_leaves( - struct xfs_mount *mp, - struct xfs_btree_block *block, - int numrecs, - xfs_filblks_t *count) -{ - int b; - xfs_bmbt_rec_t *frp; - - for (b = 1; b <= numrecs; b++) { - frp = XFS_BMBT_REC_ADDR(mp, block, b); - *count += xfs_bmbt_disk_get_blockcount(frp); - } -} - -/* - * Recursively walks each level of a btree - * to count total fsblocks in use. - */ -STATIC int -xfs_bmap_count_tree( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_ifork *ifp, - xfs_fsblock_t blockno, - int levelin, - xfs_extnum_t *nextents, - xfs_filblks_t *count) -{ - int error; - struct xfs_buf *bp, *nbp; - int level = levelin; - __be64 *pp; - xfs_fsblock_t bno = blockno; - xfs_fsblock_t nextbno; - struct xfs_btree_block *block, *nextblock; - int numrecs; - - error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - block = XFS_BUF_TO_BLOCK(bp); - - if (--level) { - /* Not at node above leaves, count this level of nodes */ - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - while (nextbno != NULLFSBLOCK) { - error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - nextblock = XFS_BUF_TO_BLOCK(nbp); - nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); - xfs_trans_brelse(tp, nbp); - } - - /* Dive to the next level */ - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); - bno = be64_to_cpu(*pp); - error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents, - count); - if (error) { - xfs_trans_brelse(tp, bp); - XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", - XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - xfs_trans_brelse(tp, bp); - } else { - /* count all level 1 nodes and their leaves */ - for (;;) { - nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); - numrecs = be16_to_cpu(block->bb_numrecs); - (*nextents) += numrecs; - xfs_bmap_disk_count_leaves(mp, block, numrecs, count); - xfs_trans_brelse(tp, bp); - if (nextbno == NULLFSBLOCK) - break; - bno = nextbno; - error = xfs_btree_read_bufl(mp, tp, bno, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); - if (error) - return error; - *count += 1; - block = XFS_BUF_TO_BLOCK(bp); - } - } - return 0; -} - -/* * Count fsblocks of the given fork. Delayed allocation extents are * not counted towards the totals. */ @@ -340,26 +211,19 @@ xfs_bmap_count_blocks( xfs_extnum_t *nextents, xfs_filblks_t *count) { - struct xfs_mount *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - struct xfs_btree_block *block; /* current btree block */ - struct xfs_ifork *ifp; /* fork structure */ - xfs_fsblock_t bno; /* block # of "block" */ - int level; /* btree level, for checking */ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur; + xfs_extlen_t btblocks = 0; int error; - bno = NULLFSBLOCK; - mp = ip->i_mount; *nextents = 0; *count = 0; - ifp = XFS_IFORK_PTR(ip, whichfork); + if (!ifp) return 0; switch (XFS_IFORK_FORMAT(ip, whichfork)) { - case XFS_DINODE_FMT_EXTENTS: - *nextents = xfs_bmap_count_leaves(ifp, count); - return 0; case XFS_DINODE_FMT_BTREE: if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(tp, ip, whichfork); @@ -367,26 +231,23 @@ xfs_bmap_count_blocks( return error; } + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + error = xfs_btree_count_blocks(cur, &btblocks); + xfs_btree_del_cursor(cur, error); + if (error) + return error; + /* - * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + * xfs_btree_count_blocks includes the root block contained in + * the inode fork in @btblocks, so subtract one because we're + * only interested in allocated disk blocks. */ - block = ifp->if_broot; - level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); - bno = be64_to_cpu(*pp); - ASSERT(bno != NULLFSBLOCK); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); - - error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, - nextents, count); - if (error) { - XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", - XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - return 0; + *count += btblocks - 1; + + /* fall through */ + case XFS_DINODE_FMT_EXTENTS: + *nextents = xfs_bmap_count_leaves(ifp, count); + break; } return 0; @@ -964,8 +825,8 @@ xfs_alloc_file_space( xfs_trans_ijoin(tp, ip, 0); error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, alloc_type, resblks, - imapp, &nimaps); + allocatesize_fsb, alloc_type, 0, imapp, + &nimaps); if (error) goto error0; @@ -1039,6 +900,7 @@ out_trans_cancel: goto out_unlock; } +/* Caller must first wait for the completion of any pending DIOs if required. */ int xfs_flush_unmap_range( struct xfs_inode *ip, @@ -1050,9 +912,6 @@ xfs_flush_unmap_range( xfs_off_t rounding, start, end; int error; - /* wait for the completion of any pending DIOs */ - inode_dio_wait(inode); - rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); start = round_down(offset, rounding); end = round_up(offset + len, rounding) - 1; @@ -1084,10 +943,6 @@ xfs_free_file_space( if (len <= 0) /* if nothing being freed */ return 0; - error = xfs_flush_unmap_range(ip, offset, len); - if (error) - return error; - startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); @@ -1113,7 +968,8 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + error = iomap_zero_range(VFS_I(ip), offset, len, NULL, + &xfs_buffered_write_iomap_ops); if (error) return error; @@ -1131,43 +987,6 @@ xfs_free_file_space( return error; } -/* - * Preallocate and zero a range of a file. This mechanism has the allocation - * semantics of fallocate and in addition converts data in the range to zeroes. - */ -int -xfs_zero_file_space( - struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t len) -{ - struct xfs_mount *mp = ip->i_mount; - uint blksize; - int error; - - trace_xfs_zero_file_space(ip); - - blksize = 1 << mp->m_sb.sb_blocklog; - - /* - * Punch a hole and prealloc the range. We use hole punch rather than - * unwritten extent conversion for two reasons: - * - * 1.) Hole punch handles partial block zeroing for us. - * - * 2.) If prealloc returns ENOSPC, the file range is still zero-valued - * by virtue of the hole punch. - */ - error = xfs_free_file_space(ip, offset, len); - if (error || xfs_is_always_cow_inode(ip)) - return error; - - return xfs_alloc_file_space(ip, round_down(offset, blksize), - round_up(offset + len, blksize) - - round_down(offset, blksize), - XFS_BMAPI_PREALLOC); -} - static int xfs_prepare_shift( struct xfs_inode *ip, @@ -1750,6 +1569,14 @@ xfs_swap_extents( goto out_unlock; } + error = xfs_qm_dqattach(ip); + if (error) + goto out_unlock; + + error = xfs_qm_dqattach(tip); + if (error) + goto out_unlock; + error = xfs_swap_extent_flush(ip); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 7a78229cf1a7..9f993168b55b 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -30,8 +30,6 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) } #endif /* CONFIG_XFS_RT */ -int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, - int whichfork, int *eof); int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, xfs_fileoff_t start_fsb, xfs_fileoff_t length); @@ -59,8 +57,6 @@ int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len, int alloc_type); int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); -int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len); int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, xfs_off_t len); int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 0abba171aa89..a0229c368e78 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -238,7 +238,7 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_zone_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_zone, bp); return NULL; } @@ -304,7 +304,7 @@ _xfs_buf_free_pages( * The buffer must not be on any hash - use xfs_buf_rele instead for * hashed and refcounted buffers */ -void +static void xfs_buf_free( xfs_buf_t *bp) { @@ -328,7 +328,7 @@ xfs_buf_free( kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); xfs_buf_free_maps(bp); - kmem_zone_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_zone, bp); } /* @@ -461,7 +461,7 @@ _xfs_buf_map_pages( unsigned nofs_flag; /* - * vm_map_ram() will allocate auxillary structures (e.g. + * vm_map_ram() will allocate auxiliary structures (e.g. * pagetables) with GFP_KERNEL, yet we are likely to be under * GFP_NOFS context here. Hence we need to tell memory reclaim * that we are in such a context via PF_MEMALLOC_NOFS to prevent @@ -949,7 +949,7 @@ xfs_buf_get_uncached( _xfs_buf_free_pages(bp); fail_free_buf: xfs_buf_free_maps(bp); - kmem_zone_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_zone, bp); fail: return NULL; } @@ -1261,8 +1261,7 @@ xfs_buf_ioapply_map( int map, int *buf_offset, int *count, - int op, - int op_flags) + int op) { int page_index; int total_nr_pages = bp->b_page_count; @@ -1297,7 +1296,7 @@ next_chunk: bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - bio_set_op_attrs(bio, op, op_flags); + bio->bi_opf = op; for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; @@ -1342,7 +1341,6 @@ _xfs_buf_ioapply( { struct blk_plug plug; int op; - int op_flags = 0; int offset; int size; int i; @@ -1384,15 +1382,14 @@ _xfs_buf_ioapply( dump_stack(); } } - } else if (bp->b_flags & XBF_READ_AHEAD) { - op = REQ_OP_READ; - op_flags = REQ_RAHEAD; } else { op = REQ_OP_READ; + if (bp->b_flags & XBF_READ_AHEAD) + op |= REQ_RAHEAD; } /* we only use the buffer cache for meta-data */ - op_flags |= REQ_META; + op |= REQ_META; /* * Walk all the vectors issuing IO on them. Set up the initial offset @@ -1404,7 +1401,7 @@ _xfs_buf_ioapply( size = BBTOB(bp->b_length); blk_start_plug(&plug); for (i = 0; i < bp->b_map_count; i++) { - xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); + xfs_buf_ioapply_map(bp, i, &offset, &size, op); if (bp->b_error) break; if (size <= 0) @@ -2063,8 +2060,9 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", - KM_ZONE_HWALIGN, NULL); + xfs_buf_zone = kmem_cache_create("xfs_buf", + sizeof(struct xfs_buf), 0, + SLAB_HWCACHE_ALIGN, NULL); if (!xfs_buf_zone) goto out; @@ -2077,7 +2075,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_zone_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_zone); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index f6ce17d8d848..56e081dd1d96 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -244,7 +244,6 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ -extern void xfs_buf_free(xfs_buf_t *); extern void xfs_buf_rele(xfs_buf_t *); /* Locking and Unlocking Buffers */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d74fbd1e9d3e..3458a1264a3f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -763,7 +763,7 @@ xfs_buf_item_init( error = xfs_buf_item_get_format(bip, bp->b_map_count); ASSERT(error == 0); if (error) { /* to stop gcc throwing set-but-unused warnings */ - kmem_zone_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_zone, bip); return error; } @@ -851,7 +851,7 @@ xfs_buf_item_log_segment( * first_bit and last_bit. */ while ((bits_to_set - bits_set) >= NBWORD) { - *wordp |= 0xffffffff; + *wordp = 0xffffffff; bits_set += NBWORD; wordp++; } @@ -939,7 +939,7 @@ xfs_buf_item_free( { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); - kmem_zone_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_zone, bip); } /* diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 283df898dd9f..0d3b640cf1cc 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -17,6 +17,7 @@ #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_trans.h" +#include "xfs_error.h" /* * Directory file type support functions @@ -47,6 +48,7 @@ xfs_dir2_sf_getdents( { int i; /* shortform entry number */ struct xfs_inode *dp = args->dp; /* incore directory inode */ + struct xfs_mount *mp = dp->i_mount; xfs_dir2_dataptr_t off; /* current entry's offset */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ @@ -68,15 +70,15 @@ xfs_dir2_sf_getdents( return 0; /* - * Precalculate offsets for . and .. as we will always need them. - * - * XXX(hch): the second argument is sometimes 0 and sometimes - * geo->datablk + * Precalculate offsets for "." and ".." as we will always need them. + * This relies on the fact that directories always start with the + * entries for "." and "..". */ dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, - dp->d_ops->data_dot_offset); + geo->data_entry_offset); dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, - dp->d_ops->data_dotdot_offset); + geo->data_entry_offset + + xfs_dir2_data_entsize(mp, sizeof(".") - 1)); /* * Put . entry unless we're starting past it. @@ -91,7 +93,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (ctx->pos <= dotdot_offset) { - ino = dp->d_ops->sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_parent_ino(sfp); ctx->pos = dotdot_offset & 0x7fffffff; if (!dir_emit(ctx, "..", 2, ino, DT_DIR)) return 0; @@ -108,17 +110,21 @@ xfs_dir2_sf_getdents( xfs_dir2_sf_get_offset(sfep)); if (ctx->pos > off) { - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); continue; } - ino = dp->d_ops->sf_get_ino(sfp, sfep); - filetype = dp->d_ops->sf_get_ftype(sfep); + ino = xfs_dir2_sf_get_ino(mp, sfp, sfep); + filetype = xfs_dir2_sf_get_ftype(mp, sfep); ctx->pos = off & 0x7fffffff; + if (XFS_IS_CORRUPT(dp->i_mount, + !xfs_dir2_namecheck(sfep->name, + sfep->namelen))) + return -EFSCORRUPTED; if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, - xfs_dir3_get_dtype(dp->i_mount, filetype))) + xfs_dir3_get_dtype(mp, filetype))) return 0; - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & @@ -135,17 +141,14 @@ xfs_dir2_block_getdents( struct dir_context *ctx) { struct xfs_inode *dp = args->dp; /* incore directory inode */ - xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ - xfs_dir2_data_entry_t *dep; /* block data entry */ - xfs_dir2_data_unused_t *dup; /* block unused entry */ - char *endptr; /* end of the data entries */ int error; /* error return value */ - char *ptr; /* current data entry */ int wantoff; /* starting block offset */ xfs_off_t cook; struct xfs_da_geometry *geo = args->geo; int lock_mode; + unsigned int offset; + unsigned int end; /* * If the block number in the offset is out of range, we're done. @@ -164,56 +167,55 @@ xfs_dir2_block_getdents( * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos); - hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); - /* - * Set up values for the loop. - */ - ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = xfs_dir3_data_endp(geo, hdr); /* * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ - while (ptr < endptr) { + offset = geo->data_entry_offset; + end = xfs_dir3_data_end_offset(geo, bp->b_addr); + while (offset < end) { + struct xfs_dir2_data_unused *dup = bp->b_addr + offset; + struct xfs_dir2_data_entry *dep = bp->b_addr + offset; uint8_t filetype; - dup = (xfs_dir2_data_unused_t *)ptr; /* * Unused, skip it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - ptr += be16_to_cpu(dup->length); + offset += be16_to_cpu(dup->length); continue; } - dep = (xfs_dir2_data_entry_t *)ptr; - /* * Bump pointer for the next iteration. */ - ptr += dp->d_ops->data_entsize(dep->namelen); + offset += xfs_dir2_data_entsize(dp->i_mount, dep->namelen); + /* * The entry is before the desired starting point, skip it. */ - if ((char *)dep - (char *)hdr < wantoff) + if (offset < wantoff) continue; - cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, - (char *)dep - (char *)hdr); + cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, offset); ctx->pos = cook & 0x7fffffff; - filetype = dp->d_ops->data_get_ftype(dep); + filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep); /* * If it didn't fit, set the final offset to here & return. */ + if (XFS_IS_CORRUPT(dp->i_mount, + !xfs_dir2_namecheck(dep->name, + dep->namelen))) { + error = -EFSCORRUPTED; + goto out_rele; + } if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), - xfs_dir3_get_dtype(dp->i_mount, filetype))) { - xfs_trans_brelse(args->trans, bp); - return 0; - } + xfs_dir3_get_dtype(dp->i_mount, filetype))) + goto out_rele; } /* @@ -222,8 +224,9 @@ xfs_dir2_block_getdents( */ ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & 0x7fffffff; +out_rele: xfs_trans_brelse(args->trans, bp); - return 0; + return error; } /* @@ -276,7 +279,7 @@ xfs_dir2_leaf_readbuf( new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); if (new_off > *cur_off) *cur_off = new_off; - error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp); + error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, 0, &bp); if (error) goto out; @@ -311,7 +314,8 @@ xfs_dir2_leaf_readbuf( break; } if (next_ra > *ra_blk) { - xfs_dir3_data_readahead(dp, next_ra, -2); + xfs_dir3_data_readahead(dp, next_ra, + XFS_DABUF_MAP_HOLE_OK); *ra_blk = next_ra; } ra_want -= geo->fsbcount; @@ -343,17 +347,17 @@ xfs_dir2_leaf_getdents( size_t bufsize) { struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp = NULL; /* data block buffer */ - xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ - char *ptr = NULL; /* pointer to current data */ struct xfs_da_geometry *geo = args->geo; xfs_dablk_t rablk = 0; /* current readahead block */ xfs_dir2_off_t curoff; /* current overall offset */ int length; /* temporary length value */ int byteoff; /* offset in current block */ int lock_mode; + unsigned int offset = 0; int error = 0; /* error return value */ /* @@ -380,7 +384,7 @@ xfs_dir2_leaf_getdents( * If we have no buffer, or we're off the end of the * current buffer, need to get another one. */ - if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { + if (!bp || offset >= geo->blksize) { if (bp) { xfs_trans_brelse(args->trans, bp); bp = NULL; @@ -393,36 +397,35 @@ xfs_dir2_leaf_getdents( if (error || !bp) break; - hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); /* * Find our position in the block. */ - ptr = (char *)dp->d_ops->data_entry_p(hdr); + offset = geo->data_entry_offset; byteoff = xfs_dir2_byte_to_off(geo, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += dp->d_ops->data_entry_offset; + curoff += geo->data_entry_offset; /* * Skip past entries until we reach our offset. */ else { - while ((char *)ptr - (char *)hdr < byteoff) { - dup = (xfs_dir2_data_unused_t *)ptr; + while (offset < byteoff) { + dup = bp->b_addr + offset; if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { length = be16_to_cpu(dup->length); - ptr += length; + offset += length; continue; } - dep = (xfs_dir2_data_entry_t *)ptr; - length = - dp->d_ops->data_entsize(dep->namelen); - ptr += length; + dep = bp->b_addr + offset; + length = xfs_dir2_data_entsize(mp, + dep->namelen); + offset += length; } /* * Now set our real offset. @@ -430,32 +433,38 @@ xfs_dir2_leaf_getdents( curoff = xfs_dir2_db_off_to_byte(geo, xfs_dir2_byte_to_db(geo, curoff), - (char *)ptr - (char *)hdr); - if (ptr >= (char *)hdr + geo->blksize) { + offset); + if (offset >= geo->blksize) continue; - } } } + /* - * We have a pointer to an entry. - * Is it a live one? + * We have a pointer to an entry. Is it a live one? */ - dup = (xfs_dir2_data_unused_t *)ptr; + dup = bp->b_addr + offset; + /* * No, it's unused, skip over it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { length = be16_to_cpu(dup->length); - ptr += length; + offset += length; curoff += length; continue; } - dep = (xfs_dir2_data_entry_t *)ptr; - length = dp->d_ops->data_entsize(dep->namelen); - filetype = dp->d_ops->data_get_ftype(dep); + dep = bp->b_addr + offset; + length = xfs_dir2_data_entsize(mp, dep->namelen); + filetype = xfs_dir2_data_get_ftype(mp, dep); ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; + if (XFS_IS_CORRUPT(dp->i_mount, + !xfs_dir2_namecheck(dep->name, + dep->namelen))) { + error = -EFSCORRUPTED; + break; + } if (!dir_emit(ctx, (char *)dep->name, dep->namelen, be64_to_cpu(dep->inumber), xfs_dir3_get_dtype(dp->i_mount, filetype))) @@ -464,7 +473,7 @@ xfs_dir2_leaf_getdents( /* * Advance to next entry in the block. */ - ptr += length; + offset += length; curoff += length; /* bufsize may have just been a guess; don't go negative */ bufsize = bufsize > length ? bufsize - length : 0; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8ec7aab89044..cae613620175 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -13,6 +13,7 @@ #include "xfs_btree.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" +#include "xfs_discard.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_trace.h" @@ -70,7 +71,10 @@ xfs_trim_extents( error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) goto out_del_cursor; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor); + if (XFS_IS_CORRUPT(mp, i != 1)) { + error = -EFSCORRUPTED; + goto out_del_cursor; + } ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); /* diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index aeb95e7391c1..2bff21ca9d78 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -48,7 +48,7 @@ static struct lock_class_key xfs_dquot_project_class; */ void xfs_qm_dqdestroy( - xfs_dquot_t *dqp) + struct xfs_dquot *dqp) { ASSERT(list_empty(&dqp->q_lru)); @@ -56,7 +56,7 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); - kmem_zone_free(xfs_qm_dqzone, dqp); + kmem_cache_free(xfs_qm_dqzone, dqp); } /* @@ -113,8 +113,8 @@ xfs_qm_adjust_dqlimits( */ void xfs_qm_adjust_dqtimers( - xfs_mount_t *mp, - xfs_disk_dquot_t *d) + struct xfs_mount *mp, + struct xfs_disk_dquot *d) { ASSERT(d->d_id); @@ -305,8 +305,8 @@ xfs_dquot_disk_alloc( /* Create the block mapping. */ xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps); + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, + &nmaps); if (error) return error; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); @@ -497,7 +497,7 @@ xfs_dquot_from_disk( struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; /* copy everything from disk dquot to the incore dquot */ - memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); + memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot)); /* * Reservation counters are defined as reservation plus current usage @@ -833,7 +833,7 @@ xfs_qm_id_for_quotatype( case XFS_DQ_GROUP: return ip->i_d.di_gid; case XFS_DQ_PROJ: - return xfs_get_projid(ip); + return ip->i_d.di_projid; } ASSERT(0); return 0; @@ -989,7 +989,7 @@ xfs_qm_dqput( */ void xfs_qm_dqrele( - xfs_dquot_t *dqp) + struct xfs_dquot *dqp) { if (!dqp) return; @@ -1018,8 +1018,8 @@ xfs_qm_dqflush_done( struct xfs_buf *bp, struct xfs_log_item *lip) { - xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; - xfs_dquot_t *dqp = qip->qli_dquot; + struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; + struct xfs_dquot *dqp = qip->qli_dquot; struct xfs_ail *ailp = lip->li_ailp; /* @@ -1126,11 +1126,11 @@ xfs_qm_dqflush( xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return -EIO; + return -EFSCORRUPTED; } /* This is the only portion of data that needs to persist */ - memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); + memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot)); /* * Clear the dirty field and remember the flush lsn for later use. @@ -1188,8 +1188,8 @@ out_unlock: */ void xfs_dqlock2( - xfs_dquot_t *d1, - xfs_dquot_t *d2) + struct xfs_dquot *d1, + struct xfs_dquot *d2) { if (d1 && d2) { ASSERT(d1 != d2); @@ -1211,20 +1211,22 @@ xfs_dqlock2( int __init xfs_qm_init(void) { - xfs_qm_dqzone = - kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot"); + xfs_qm_dqzone = kmem_cache_create("xfs_dquot", + sizeof(struct xfs_dquot), + 0, 0, NULL); if (!xfs_qm_dqzone) goto out; - xfs_qm_dqtrxzone = - kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx"); + xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", + sizeof(struct xfs_dquot_acct), + 0, 0, NULL); if (!xfs_qm_dqtrxzone) goto out_free_dqzone; return 0; out_free_dqzone: - kmem_zone_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_qm_dqzone); out: return -ENOMEM; } @@ -1232,8 +1234,8 @@ out: void xfs_qm_exit(void) { - kmem_zone_destroy(xfs_qm_dqtrxzone); - kmem_zone_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_qm_dqtrxzone); + kmem_cache_destroy(xfs_qm_dqzone); } /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 4fe85709d55d..fe3e46df604b 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -30,33 +30,36 @@ enum { /* * The incore dquot structure */ -typedef struct xfs_dquot { - uint dq_flags; /* various flags (XFS_DQ_*) */ - struct list_head q_lru; /* global free list of dquots */ - struct xfs_mount*q_mount; /* filesystem this relates to */ - uint q_nrefs; /* # active refs from inodes */ - xfs_daddr_t q_blkno; /* blkno of dquot buffer */ - int q_bufoffset; /* off of dq in buffer (# dquots) */ - xfs_fileoff_t q_fileoffset; /* offset in quotas file */ - - xfs_disk_dquot_t q_core; /* actual usage & quotas */ - xfs_dq_logitem_t q_logitem; /* dquot log item */ - xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ - xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ - xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ - xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */ - xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */ - int64_t q_low_space[XFS_QLOWSP_MAX]; - struct mutex q_qlock; /* quota lock */ - struct completion q_flush; /* flush completion queue */ - atomic_t q_pincount; /* dquot pin count */ - wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ -} xfs_dquot_t; +struct xfs_dquot { + uint dq_flags; + struct list_head q_lru; + struct xfs_mount *q_mount; + uint q_nrefs; + xfs_daddr_t q_blkno; + int q_bufoffset; + xfs_fileoff_t q_fileoffset; + + struct xfs_disk_dquot q_core; + struct xfs_dq_logitem q_logitem; + /* total regular nblks used+reserved */ + xfs_qcnt_t q_res_bcount; + /* total inos allocd+reserved */ + xfs_qcnt_t q_res_icount; + /* total realtime blks used+reserved */ + xfs_qcnt_t q_res_rtbcount; + xfs_qcnt_t q_prealloc_lo_wmark; + xfs_qcnt_t q_prealloc_hi_wmark; + int64_t q_low_space[XFS_QLOWSP_MAX]; + struct mutex q_qlock; + struct completion q_flush; + atomic_t q_pincount; + struct wait_queue_head q_pinwait; +}; /* * Lock hierarchy for q_qlock: * XFS_QLOCK_NORMAL is the implicit default, - * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 + * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 */ enum { XFS_QLOCK_NORMAL = 0, @@ -64,21 +67,21 @@ enum { }; /* - * Manage the q_flush completion queue embedded in the dquot. This completion + * Manage the q_flush completion queue embedded in the dquot. This completion * queue synchronizes processes attempting to flush the in-core dquot back to * disk. */ -static inline void xfs_dqflock(xfs_dquot_t *dqp) +static inline void xfs_dqflock(struct xfs_dquot *dqp) { wait_for_completion(&dqp->q_flush); } -static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp) +static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp) { return try_wait_for_completion(&dqp->q_flush); } -static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +static inline void xfs_dqfunlock(struct xfs_dquot *dqp) { complete(&dqp->q_flush); } @@ -112,7 +115,7 @@ static inline int xfs_this_quota_on(struct xfs_mount *mp, int type) } } -static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) +static inline struct xfs_dquot *xfs_inode_dquot(struct xfs_inode *ip, int type) { switch (type & XFS_DQ_ALLTYPES) { case XFS_DQ_USER: @@ -147,31 +150,30 @@ static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) -extern void xfs_qm_dqdestroy(xfs_dquot_t *); -extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **); -extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); -extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, - xfs_disk_dquot_t *); -extern void xfs_qm_adjust_dqlimits(struct xfs_mount *, - struct xfs_dquot *); -extern xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, - uint type); -extern int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id, +void xfs_qm_dqdestroy(struct xfs_dquot *dqp); +int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); +void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); +void xfs_qm_adjust_dqtimers(struct xfs_mount *mp, + struct xfs_disk_dquot *d); +void xfs_qm_adjust_dqlimits(struct xfs_mount *mp, + struct xfs_dquot *d); +xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type); +int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id, uint type, bool can_alloc, struct xfs_dquot **dqpp); -extern int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type, - bool can_alloc, - struct xfs_dquot **dqpp); -extern int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id, +int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type, + bool can_alloc, + struct xfs_dquot **dqpp); +int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id, uint type, struct xfs_dquot **dqpp); -extern int xfs_qm_dqget_uncached(struct xfs_mount *mp, - xfs_dqid_t id, uint type, - struct xfs_dquot **dqpp); -extern void xfs_qm_dqput(xfs_dquot_t *); +int xfs_qm_dqget_uncached(struct xfs_mount *mp, + xfs_dqid_t id, uint type, + struct xfs_dquot **dqpp); +void xfs_qm_dqput(struct xfs_dquot *dqp); -extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); +void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); -extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); +void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) { diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 1aed34ccdabc..3bb19e556ade 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -11,25 +11,27 @@ struct xfs_trans; struct xfs_mount; struct xfs_qoff_logitem; -typedef struct xfs_dq_logitem { - struct xfs_log_item qli_item; /* common portion */ - struct xfs_dquot *qli_dquot; /* dquot ptr */ - xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ -} xfs_dq_logitem_t; +struct xfs_dq_logitem { + struct xfs_log_item qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ + xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ +}; -typedef struct xfs_qoff_logitem { - struct xfs_log_item qql_item; /* common portion */ - struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ +struct xfs_qoff_logitem { + struct xfs_log_item qql_item; /* common portion */ + struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ unsigned int qql_flags; -} xfs_qoff_logitem_t; +}; -extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); -extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, - struct xfs_qoff_logitem *, uint); -extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *, uint); -extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *); +void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); +struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags); +struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp, + struct xfs_qoff_logitem *startqoff, + uint flags); +void xfs_trans_log_quotaoff_item(struct xfs_trans *tp, + struct xfs_qoff_logitem *qlp); #endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 849fd4476950..331765afc53e 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -257,7 +257,7 @@ xfs_errortag_test( xfs_warn_ratelimited(mp, "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", - expression, file, line, mp->m_fsname); + expression, file, line, mp->m_super->s_id); return true; } @@ -329,19 +329,40 @@ xfs_corruption_error( const char *tag, int level, struct xfs_mount *mp, - void *buf, + const void *buf, size_t bufsize, const char *filename, int linenum, xfs_failaddr_t failaddr) { - if (level <= xfs_error_level) + if (buf && level <= xfs_error_level) xfs_hex_dump(buf, bufsize); xfs_error_report(tag, level, mp, filename, linenum, failaddr); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } /* + * Complain about the kinds of metadata corruption that we can't detect from a + * verifier, such as incorrect inter-block relationship data. Does not set + * bp->b_error. + */ +void +xfs_buf_corruption_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + + xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, + "Metadata corruption detected at %pS, %s block 0x%llx", + __return_address, bp->b_ops->name, bp->b_bn); + + xfs_alert(mp, "Unmount and run xfs_repair"); + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} + +/* * Warnings specifically for verifier errors. Differentiate CRC vs. invalid * values, and omit the stack trace unless the error level is tuned high. */ @@ -350,7 +371,7 @@ xfs_buf_verifier_error( struct xfs_buf *bp, int error, const char *name, - void *buf, + const void *buf, size_t bufsz, xfs_failaddr_t failaddr) { @@ -402,7 +423,7 @@ xfs_inode_verifier_error( struct xfs_inode *ip, int error, const char *name, - void *buf, + const void *buf, size_t bufsz, xfs_failaddr_t failaddr) { diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 602aa7d62b66..31a5d321ba9a 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -12,16 +12,17 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, const char *filename, int linenum, xfs_failaddr_t failaddr); extern void xfs_corruption_error(const char *tag, int level, - struct xfs_mount *mp, void *buf, size_t bufsize, + struct xfs_mount *mp, const void *buf, size_t bufsize, const char *filename, int linenum, xfs_failaddr_t failaddr); +void xfs_buf_corruption_error(struct xfs_buf *bp); extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, - const char *name, void *buf, size_t bufsz, + const char *name, const void *buf, size_t bufsz, xfs_failaddr_t failaddr); extern void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, - const char *name, void *buf, size_t bufsz, + const char *name, const void *buf, size_t bufsz, xfs_failaddr_t failaddr); #define XFS_ERROR_REPORT(e, lvl, mp) \ @@ -37,32 +38,6 @@ extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, /* Dump 128 bytes of any corrupt buffer */ #define XFS_CORRUPTION_DUMP_LEN (128) -/* - * Macros to set EFSCORRUPTED & return/branch. - */ -#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \ - { \ - int fs_is_ok = (x); \ - ASSERT(fs_is_ok); \ - if (unlikely(!fs_is_ok)) { \ - XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ - XFS_ERRLEVEL_LOW, mp); \ - error = -EFSCORRUPTED; \ - goto l; \ - } \ - } - -#define XFS_WANT_CORRUPTED_RETURN(mp, x) \ - { \ - int fs_is_ok = (x); \ - ASSERT(fs_is_ok); \ - if (unlikely(!fs_is_ok)) { \ - XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ - XFS_ERRLEVEL_LOW, mp); \ - return -EFSCORRUPTED; \ - } \ - } - #ifdef DEBUG extern int xfs_errortag_init(struct xfs_mount *mp); extern void xfs_errortag_del(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 2183d87be4cf..3991e59cfd18 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -367,7 +367,7 @@ restart: * If this is a metadata allocation, try to reuse the busy * extent instead of trimming the allocation. */ - if (!xfs_alloc_is_userdata(args->datatype) && + if (!(args->datatype & XFS_ALLOC_USERDATA) && !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { if (!xfs_extent_busy_update_extent(args->mp, args->pag, busyp, fbno, flen, diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index e44efc41a041..6ea847f6e298 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -21,7 +21,7 @@ #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_trace.h" - +#include "xfs_error.h" kmem_zone_t *xfs_efi_zone; kmem_zone_t *xfs_efd_zone; @@ -39,7 +39,7 @@ xfs_efi_item_free( if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); else - kmem_zone_free(xfs_efi_zone, efip); + kmem_cache_free(xfs_efi_zone, efip); } /* @@ -228,6 +228,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; } @@ -243,7 +244,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); else - kmem_zone_free(xfs_efd_zone, efdp); + kmem_cache_free(xfs_efd_zone, efdp); } /* @@ -624,7 +625,7 @@ xfs_efi_recover( */ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); xfs_efi_release(efip); - return -EIO; + return -EFSCORRUPTED; } } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c0620135a279..c93250108952 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -188,7 +188,8 @@ xfs_file_dio_aio_read( file_accessed(iocb->ki_filp); xfs_ilock(ip, XFS_IOLOCK_SHARED); - ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL, is_sync_kiocb(iocb)); + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, + is_sync_kiocb(iocb)); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -215,7 +216,7 @@ xfs_file_dax_read( xfs_ilock(ip, XFS_IOLOCK_SHARED); } - ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -351,7 +352,7 @@ restart: trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, - NULL, &xfs_iomap_ops); + NULL, &xfs_buffered_write_iomap_ops); if (error) return error; } else @@ -486,8 +487,7 @@ xfs_file_dio_aio_write( int unaligned_io = 0; int iolock; size_t count = iov_iter_count(from); - struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) @@ -551,7 +551,8 @@ xfs_file_dio_aio_write( * If unaligned, this is the only IO in-flight. Wait on it before we * release the iolock to prevent subsequent overlapping IO. */ - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops, + ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, + &xfs_dio_write_ops, is_sync_kiocb(iocb) || unaligned_io); out: xfs_iunlock(ip, iolock); @@ -591,7 +592,7 @@ xfs_file_dax_write( count = iov_iter_count(from); trace_xfs_file_dax_write(ip, count, pos); - ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -638,7 +639,8 @@ write_retry: current->backing_dev_info = inode_to_bdi(inode); trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); - ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); + ret = iomap_file_buffered_write(iocb, from, + &xfs_buffered_write_iomap_ops); if (likely(ret >= 0)) iocb->ki_pos += ret; @@ -815,6 +817,36 @@ xfs_file_fallocate( if (error) goto out_unlock; + /* + * Must wait for all AIO to complete before we continue as AIO can + * change the file size on completion without holding any locks we + * currently hold. We must do this first because AIO can update both + * the on disk and in memory inode sizes, and the operations that follow + * require the in-memory size to be fully up-to-date. + */ + inode_dio_wait(inode); + + /* + * Now AIO and DIO has drained we flush and (if necessary) invalidate + * the cached range over the first operation we are about to run. + * + * We care about zero and collapse here because they both run a hole + * punch over the range first. Because that can zero data, and the range + * of invalidation for the shift operations is much larger, we still do + * the required flush for collapse in xfs_prepare_shift(). + * + * Insert has the same range requirements as collapse, and we extend the + * file first which can zero data. Hence insert has the same + * flush/invalidate requirements as collapse and so they are both + * handled at the right time by xfs_prepare_shift(). + */ + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_COLLAPSE_RANGE)) { + error = xfs_flush_unmap_range(ip, offset, len); + if (error) + goto out_unlock; + } + if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -878,16 +910,30 @@ xfs_file_fallocate( } if (mode & FALLOC_FL_ZERO_RANGE) { - error = xfs_zero_file_space(ip, offset, len); + /* + * Punch a hole and prealloc the range. We use a hole + * punch rather than unwritten extent conversion for two + * reasons: + * + * 1.) Hole punch handles partial block zeroing for us. + * 2.) If prealloc returns ENOSPC, the file range is + * still zero-valued by virtue of the hole punch. + */ + unsigned int blksize = i_blocksize(inode); + + trace_xfs_zero_file_space(ip); + + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out_unlock; + + len = round_up(offset + len, blksize) - + round_down(offset, blksize); + offset = round_down(offset, blksize); } else if (mode & FALLOC_FL_UNSHARE_RANGE) { error = xfs_reflink_unshare(ip, offset, len); if (error) goto out_unlock; - - if (!xfs_is_always_cow_inode(ip)) { - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); - } } else { /* * If always_cow mode we can't use preallocations and @@ -897,12 +943,14 @@ xfs_file_fallocate( error = -EOPNOTSUPP; goto out_unlock; } + } + if (!xfs_is_always_cow_inode(ip)) { error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC); + if (error) + goto out_unlock; } - if (error) - goto out_unlock; } if (file->f_flags & O_DSYNC) @@ -1056,7 +1104,7 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - error = xfs_dir3_data_readahead(ip, 0, -1); + error = xfs_dir3_data_readahead(ip, 0, 0); xfs_iunlock(ip, mode); return error; } @@ -1153,12 +1201,16 @@ __xfs_filemap_fault( if (IS_DAX(inode)) { pfn_t pfn; - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_direct_write_iomap_ops : + &xfs_read_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { if (write_fault) - ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, + &xfs_buffered_write_iomap_ops); else ret = filemap_fault(vmf); } @@ -1222,22 +1274,22 @@ static const struct vm_operations_struct xfs_file_vm_ops = { STATIC int xfs_file_mmap( - struct file *filp, - struct vm_area_struct *vma) + struct file *file, + struct vm_area_struct *vma) { - struct dax_device *dax_dev; + struct inode *inode = file_inode(file); + struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); - dax_dev = xfs_find_daxdev_for_inode(file_inode(filp)); /* * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma, dax_dev)) + if (!daxdev_mapping_supported(vma, target->bt_daxdev)) return -EOPNOTSUPP; - file_accessed(filp); + file_accessed(file); vma->vm_ops = &xfs_file_vm_ops; - if (IS_DAX(file_inode(filp))) + if (IS_DAX(inode)) vma->vm_flags |= VM_HUGEPAGE; return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 574a7a8b4736..5f12b5d8527a 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -18,6 +18,7 @@ #include "xfs_trace.h" #include "xfs_ag_resv.h" #include "xfs_trans.h" +#include "xfs_filestream.h" struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; @@ -374,7 +375,7 @@ xfs_filestream_new_ag( startag = (item->ag + 1) % mp->m_sb.sb_agcount; } - if (xfs_alloc_is_userdata(ap->datatype)) + if (ap->datatype & XFS_ALLOC_USERDATA) flags |= XFS_PICK_USERDATA; if (ap->tp->t_flags & XFS_TRANS_LOWMODE) flags |= XFS_PICK_LOWSPACE; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index d082143feb5a..918456ca29e1 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -146,6 +146,7 @@ xfs_fsmap_owner_from_rmap( dest->fmr_owner = XFS_FMR_OWN_FREE; break; default: + ASSERT(0); return -EFSCORRUPTED; } return 0; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 944add5ff8e0..8dc2e5414276 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -44,7 +44,7 @@ xfs_inode_alloc( if (!ip) return NULL; if (inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_zone_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_zone, ip); return NULL; } @@ -104,7 +104,7 @@ xfs_inode_free_callback( ip->i_itemp = NULL; } - kmem_zone_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_zone, ip); } static void @@ -1419,7 +1419,7 @@ xfs_inode_match_id( return 0; if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && - xfs_get_projid(ip) != eofb->eof_prid) + ip->i_d.di_projid != eofb->eof_prid) return 0; return 1; @@ -1443,7 +1443,7 @@ xfs_inode_match_id_union( return 1; if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && - xfs_get_projid(ip) == eofb->eof_prid) + ip->i_d.di_projid == eofb->eof_prid) return 1; return 0; diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 3ebd1b7f49d8..490fee22b878 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -55,7 +55,7 @@ STATIC void xfs_icreate_item_release( struct xfs_log_item *lip) { - kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip)); + kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); } static const struct xfs_item_ops xfs_icreate_item_ops = { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 18f4b262e61c..401da197f012 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -55,6 +55,12 @@ xfs_extlen_t xfs_get_extsz_hint( struct xfs_inode *ip) { + /* + * No point in aligning allocations if we need to COW to actually + * write to them. + */ + if (xfs_is_always_cow_inode(ip)) + return 0; if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) return ip->i_d.di_extsize; if (XFS_IS_REALTIME_INODE(ip)) @@ -809,7 +815,7 @@ xfs_ialloc( ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); inode->i_rdev = rdev; - xfs_set_projid(ip, prid); + ip->i_d.di_projid = prid; if (pip && XFS_INHERIT_GID(pip)) { ip->i_d.di_gid = pip->i_d.di_gid; @@ -845,8 +851,7 @@ xfs_ialloc( inode_set_iversion(inode, 1); ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; - ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; - ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec; + ip->i_d.di_crtime = tv; } @@ -1418,7 +1423,7 @@ xfs_link( * the tree quota mechanism could be circumvented. */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { + tdp->i_d.di_projid != sip->i_d.di_projid)) { error = -EXDEV; goto error_return; } @@ -2130,8 +2135,10 @@ xfs_iunlink_update_bucket( * passed in because either we're adding or removing ourselves from the * head of the list. */ - if (old_value == new_agino) + if (old_value == new_agino) { + xfs_buf_corruption_error(agibp); return -EFSCORRUPTED; + } agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); offset = offsetof(struct xfs_agi, agi_unlinked) + @@ -2194,6 +2201,8 @@ xfs_iunlink_update_inode( /* Make sure the old pointer isn't garbage. */ old_value = be32_to_cpu(dip->di_next_unlinked); if (!xfs_verify_agino_or_null(mp, agno, old_value)) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, + sizeof(*dip), __this_address); error = -EFSCORRUPTED; goto out; } @@ -2205,8 +2214,11 @@ xfs_iunlink_update_inode( */ *old_next_agino = old_value; if (old_value == next_agino) { - if (next_agino != NULLAGINO) + if (next_agino != NULLAGINO) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, + dip, sizeof(*dip), __this_address); error = -EFSCORRUPTED; + } goto out; } @@ -2257,8 +2269,10 @@ xfs_iunlink( */ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); if (next_agino == agino || - !xfs_verify_agino_or_null(mp, agno, next_agino)) + !xfs_verify_agino_or_null(mp, agno, next_agino)) { + xfs_buf_corruption_error(agibp); return -EFSCORRUPTED; + } if (next_agino != NULLAGINO) { struct xfs_perag *pag; @@ -3196,6 +3210,7 @@ xfs_rename( struct xfs_trans *tp; struct xfs_inode *wip = NULL; /* whiteout inode */ struct xfs_inode *inodes[__XFS_SORT_INODES]; + struct xfs_buf *agibp; int num_inodes = __XFS_SORT_INODES; bool new_parent = (src_dp != target_dp); bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); @@ -3270,7 +3285,7 @@ xfs_rename( * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { + target_dp->i_d.di_projid != src_ip->i_d.di_projid)) { error = -EXDEV; goto out_trans_cancel; } @@ -3327,7 +3342,6 @@ xfs_rename( goto out_trans_cancel; xfs_bumplink(tp, wip); - xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); VFS_I(wip)->i_state &= ~I_LINKABLE; } @@ -3361,6 +3375,22 @@ xfs_rename( * In case there is already an entry with the same * name at the destination directory, remove it first. */ + + /* + * Check whether the replace operation will need to allocate + * blocks. This happens when the shortform directory lacks + * space and we have to convert it to a block format directory. + * When more blocks are necessary, we must lock the AGI first + * to preserve locking order (AGI -> AGF). + */ + if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) { + error = xfs_read_agi(mp, tp, + XFS_INO_TO_AGNO(mp, target_ip->i_ino), + &agibp); + if (error) + goto out_trans_cancel; + } + error = xfs_dir_replace(tp, target_dp, target_name, src_ip->i_ino, spaceres); if (error) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 558173f95a03..492e53992fa9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -37,9 +37,6 @@ typedef struct xfs_inode { struct xfs_ifork *i_cowfp; /* copy on write extents */ struct xfs_ifork i_df; /* data fork */ - /* operations vectors */ - const struct xfs_dir_ops *d_ops; /* directory ops vector */ - /* Transaction and locking information. */ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ @@ -177,30 +174,11 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags) return ret; } -/* - * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was chosen - * to retain compatibility with "old" filesystems). - */ -static inline prid_t -xfs_get_projid(struct xfs_inode *ip) -{ - return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo; -} - -static inline void -xfs_set_projid(struct xfs_inode *ip, - prid_t projid) -{ - ip->i_d.di_projid_hi = (uint16_t) (projid >> 16); - ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff); -} - static inline prid_t xfs_get_initial_prid(struct xfs_inode *dp) { if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - return xfs_get_projid(dp); + return dp->i_d.di_projid; return XFS_PROJID_DEFAULT; } @@ -220,6 +198,13 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) } /* + * Return the buftarg used for data allocations on a given inode. + */ +#define xfs_inode_buftarg(ip) \ + (XFS_IS_REALTIME_INODE(ip) ? \ + (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp) + +/* * In-core inode flags. */ #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index bb8f076805b9..8bd5d0de6321 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -17,6 +17,7 @@ #include "xfs_trans_priv.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_error.h" #include <linux/iversion.h> @@ -309,8 +310,8 @@ xfs_inode_to_log_dinode( to->di_format = from->di_format; to->di_uid = from->di_uid; to->di_gid = from->di_gid; - to->di_projid_lo = from->di_projid_lo; - to->di_projid_hi = from->di_projid_hi; + to->di_projid_lo = from->di_projid & 0xffff; + to->di_projid_hi = from->di_projid >> 16; memset(to->di_pad, 0, sizeof(to->di_pad)); memset(to->di_pad3, 0, sizeof(to->di_pad3)); @@ -340,8 +341,8 @@ xfs_inode_to_log_dinode( if (from->di_version == 3) { to->di_changecount = inode_peek_iversion(inode); - to->di_crtime.t_sec = from->di_crtime.t_sec; - to->di_crtime.t_nsec = from->di_crtime.t_nsec; + to->di_crtime.t_sec = from->di_crtime.tv_sec; + to->di_crtime.t_nsec = from->di_crtime.tv_nsec; to->di_flags2 = from->di_flags2; to->di_cowextsize = from->di_cowextsize; to->di_ino = ip->i_ino; @@ -666,7 +667,7 @@ xfs_inode_item_destroy( xfs_inode_t *ip) { kmem_free(ip->i_itemp->ili_item.li_lv_shadow); - kmem_zone_free(xfs_ili_zone, ip->i_itemp); + kmem_cache_free(xfs_ili_zone, ip->i_itemp); } @@ -828,8 +829,10 @@ xfs_inode_item_format_convert( { struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; - if (buf->i_len != sizeof(*in_f32)) + if (buf->i_len != sizeof(*in_f32)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; + } in_f->ilf_type = in_f32->ilf_type; in_f->ilf_size = in_f32->ilf_size; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d58f0d6a699e..7b35d62ede9f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -33,6 +33,8 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_health.h" +#include "xfs_reflink.h" +#include "xfs_ioctl.h" #include <linux/mount.h> #include <linux/namei.h> @@ -290,82 +292,6 @@ xfs_readlink_by_handle( return error; } -int -xfs_set_dmattrs( - xfs_inode_t *ip, - uint evmask, - uint16_t state) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_trans_t *tp; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); - if (error) - return error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - ip->i_d.di_dmevmask = evmask; - ip->i_d.di_dmstate = state; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp); - - return error; -} - -STATIC int -xfs_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -EPERM; - if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) - return -EFAULT; - - error = mnt_want_write_file(parfilp); - if (error) - return error; - - dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) { - mnt_drop_write_file(parfilp); - return PTR_ERR(dentry); - } - - if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) { - error = -EPERM; - goto out; - } - - if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - error = -EFAULT; - goto out; - } - - error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - - out: - mnt_drop_write_file(parfilp); - dput(dentry); - return error; -} - STATIC int xfs_attrlist_by_handle( struct file *parfilp, @@ -588,13 +514,12 @@ xfs_attrmulti_by_handle( int xfs_ioc_space( struct file *filp, - unsigned int cmd, xfs_flock64_t *bf) { struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct iattr iattr; - enum xfs_prealloc_flags flags = 0; + enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; int error; @@ -607,6 +532,9 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -EINVAL; + if (xfs_is_always_cow_inode(ip)) + return -EOPNOTSUPP; + if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; if (filp->f_mode & FMODE_NOCMTIME) @@ -620,6 +548,7 @@ xfs_ioc_space( error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); if (error) goto out_unlock; + inode_dio_wait(inode); switch (bf->l_whence) { case 0: /*SEEK_SET*/ @@ -635,73 +564,21 @@ xfs_ioc_space( goto out_unlock; } - /* - * length of <= 0 for resv/unresv/zero is invalid. length for - * alloc/free is ignored completely and we have no idea what userspace - * might have set it to, so set it to zero to allow range - * checks to pass. - */ - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - if (bf->l_len <= 0) { - error = -EINVAL; - goto out_unlock; - } - break; - default: - bf->l_len = 0; - break; - } - - if (bf->l_start < 0 || - bf->l_start > inode->i_sb->s_maxbytes || - bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { + if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - flags |= XFS_PREALLOC_SET; - error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); - break; - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - flags |= XFS_PREALLOC_SET; - error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, - XFS_BMAPI_PREALLOC); - break; - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - error = xfs_free_file_space(ip, bf->l_start, bf->l_len); - break; - case XFS_IOC_ALLOCSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP: - case XFS_IOC_FREESP64: - flags |= XFS_PREALLOC_CLEAR; - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); - if (error) - goto out_unlock; - } - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = bf->l_start; - - error = xfs_vn_setattr_size(file_dentry(filp), &iattr); - break; - default: - ASSERT(0); - error = -EINVAL; + if (bf->l_start > XFS_ISIZE(ip)) { + error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), + bf->l_start - XFS_ISIZE(ip), 0); + if (error) + goto out_unlock; } + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = bf->l_start; + error = xfs_vn_setattr_size(file_dentry(filp), &iattr); if (error) goto out_unlock; @@ -1116,7 +993,7 @@ xfs_fill_fsxattr( fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; fa->fsx_cowextsize = ip->i_d.di_cowextsize << ip->i_mount->m_sb.sb_blocklog; - fa->fsx_projid = xfs_get_projid(ip); + fa->fsx_projid = ip->i_d.di_projid; if (attr) { if (ip->i_afp) { @@ -1311,10 +1188,9 @@ xfs_ioctl_setattr_dax_invalidate( * have to check the device for dax support or flush pagecache. */ if (fa->fsx_xflags & FS_XFLAG_DAX) { - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) - return -EINVAL; - if (!bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), - sb->s_blocksize)) + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + + if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize)) return -EINVAL; } @@ -1569,7 +1445,7 @@ xfs_ioctl_setattr( } if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { + ip->i_d.di_projid != fa->fsx_projid) { code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -1606,13 +1482,13 @@ xfs_ioctl_setattr( VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ - if (xfs_get_projid(ip) != fa->fsx_projid) { + if (ip->i_d.di_projid != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } ASSERT(ip->i_d.di_version > 1); - xfs_set_projid(ip, fa->fsx_projid); + ip->i_d.di_projid = fa->fsx_projid; } /* @@ -2122,24 +1998,17 @@ xfs_file_ioctl( return xfs_ioc_setlabel(filp, mp, arg); case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_ZERO_RANGE: { + case XFS_IOC_FREESP64: { xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) return -EFAULT; - return xfs_ioc_space(filp, cmd, &bf); + return xfs_ioc_space(filp, &bf); } case XFS_IOC_DIOINFO: { - struct dioattr da; - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct dioattr da; da.d_mem = da.d_miniosz = target->bt_logical_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); @@ -2183,22 +2052,6 @@ xfs_file_ioctl( case XFS_IOC_SETXFLAGS: return xfs_ioc_setxflags(ip, filp, arg); - case XFS_IOC_FSSETDM: { - struct fsdmidata dmi; - - if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -EFAULT; - - error = mnt_want_write_file(filp); - if (error) - return error; - - error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, - dmi.fsd_dmstate); - mnt_drop_write_file(filp); - return error; - } - case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: @@ -2226,8 +2079,6 @@ xfs_file_ioctl( return -EFAULT; return xfs_open_by_handle(filp, &hreq); } - case XFS_IOC_FSSETDM_BY_HANDLE: - return xfs_fssetdm_by_handle(filp, arg); case XFS_IOC_READLINK_BY_HANDLE: { xfs_fsop_handlereq_t hreq; diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 654c0bb1bcf8..420bd95dc326 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -9,7 +9,6 @@ extern int xfs_ioc_space( struct file *filp, - unsigned int cmd, xfs_flock64_t *bf); int @@ -71,12 +70,6 @@ xfs_file_compat_ioctl( unsigned int cmd, unsigned long arg); -extern int -xfs_set_dmattrs( - struct xfs_inode *ip, - uint evmask, - uint16_t state); - struct xfs_ibulk; struct xfs_bstat; struct xfs_inogrp; diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 1e08bf79b478..c4c4f09113d3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -500,44 +500,6 @@ xfs_compat_attrmulti_by_handle( return error; } -STATIC int -xfs_compat_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - compat_xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -EPERM; - if (copy_from_user(&dmhreq, arg, - sizeof(compat_xfs_fsop_setdm_handlereq_t))) - return -EFAULT; - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) { - error = -EPERM; - goto out; - } - - if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { - error = -EFAULT; - goto out; - } - - error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - -out: - dput(dentry); - return error; -} - long xfs_file_compat_ioctl( struct file *filp, @@ -557,18 +519,13 @@ xfs_file_compat_ioctl( case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: case XFS_IOC_ALLOCSP64_32: - case XFS_IOC_FREESP64_32: - case XFS_IOC_RESVSP_32: - case XFS_IOC_UNRESVSP_32: - case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - case XFS_IOC_ZERO_RANGE_32: { + case XFS_IOC_FREESP64_32: { struct xfs_flock64 bf; if (xfs_compat_flock64_copyin(&bf, arg)) return -EFAULT; cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - return xfs_ioc_space(filp, cmd, &bf); + return xfs_ioc_space(filp, &bf); } case XFS_IOC_FSGEOMETRY_V1_32: return xfs_compat_ioc_fsgeometry_v1(mp, arg); @@ -651,8 +608,6 @@ xfs_file_compat_ioctl( return xfs_compat_attrlist_by_handle(filp, arg); case XFS_IOC_ATTRMULTI_BY_HANDLE_32: return xfs_compat_attrmulti_by_handle(filp, arg); - case XFS_IOC_FSSETDM_BY_HANDLE_32: - return xfs_compat_fssetdm_by_handle(filp, arg); default: /* try the native version */ return xfs_file_ioctl(filp, cmd, (unsigned long)arg); diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 7985344d3aa6..8c7743cd490e 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -99,7 +99,7 @@ typedef struct compat_xfs_fsop_handlereq { _IOWR('X', 108, struct compat_xfs_fsop_handlereq) /* The bstat field in the swapext struct needs translation */ -typedef struct compat_xfs_swapext { +struct compat_xfs_swapext { int64_t sx_version; /* version */ int64_t sx_fdtarget; /* fd of target file */ int64_t sx_fdtmp; /* fd of tmp file */ @@ -107,7 +107,7 @@ typedef struct compat_xfs_swapext { xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */ -} __compat_packed compat_xfs_swapext_t; +} __compat_packed; #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) @@ -143,15 +143,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq { #define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) -typedef struct compat_xfs_fsop_setdm_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle information */ - /* ptr to struct fsdmidata */ - compat_uptr_t data; /* DMAPI data */ -} compat_xfs_fsop_setdm_handlereq_t; - -#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ - _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) - #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 95719e161286..28e2d1f37267 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -29,8 +29,8 @@ #include "xfs_reflink.h" -#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ - << mp->m_writeio_log) +#define XFS_ALLOC_ALIGN(mp, off) \ + (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) static int xfs_alert_fsblock_zero( @@ -57,6 +57,7 @@ xfs_bmbt_to_iomap( u16 flags) { struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) return xfs_alert_fsblock_zero(ip, imap); @@ -77,8 +78,8 @@ xfs_bmbt_to_iomap( } iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); - iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); - iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); + iomap->bdev = target->bt_bdev; + iomap->dax_dev = target->bt_daxdev; iomap->flags = flags; if (xfs_ipincount(ip) && @@ -94,18 +95,30 @@ xfs_hole_to_iomap( xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); - iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); - iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); + iomap->bdev = target->bt_bdev; + iomap->dax_dev = target->bt_daxdev; } -xfs_extlen_t +static inline xfs_fileoff_t +xfs_iomap_end_fsb( + struct xfs_mount *mp, + loff_t offset, + loff_t count) +{ + ASSERT(offset <= mp->m_super->s_maxbytes); + return min(XFS_B_TO_FSB(mp, offset + count), + XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); +} + +static xfs_extlen_t xfs_eof_alignment( - struct xfs_inode *ip, - xfs_extlen_t extsize) + struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; xfs_extlen_t align = 0; @@ -128,111 +141,80 @@ xfs_eof_alignment( align = 0; } - /* - * Always round up the allocation request to an extent boundary - * (when file on a real-time subvolume or has di_extsize hint). - */ - if (extsize) { - if (align) - align = roundup_64(align, extsize); - else - align = extsize; - } - return align; } -STATIC int +/* + * Check if last_fsb is outside the last extent, and if so grow it to the next + * stripe unit boundary. + */ +xfs_fileoff_t xfs_iomap_eof_align_last_fsb( struct xfs_inode *ip, - xfs_extlen_t extsize, - xfs_fileoff_t *last_fsb) + xfs_fileoff_t end_fsb) { - xfs_extlen_t align = xfs_eof_alignment(ip, extsize); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_extlen_t extsz = xfs_get_extsz_hint(ip); + xfs_extlen_t align = xfs_eof_alignment(ip); + struct xfs_bmbt_irec irec; + struct xfs_iext_cursor icur; + + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + + /* + * Always round up the allocation request to the extent hint boundary. + */ + if (extsz) { + if (align) + align = roundup_64(align, extsz); + else + align = extsz; + } if (align) { - xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); - int eof, error; + xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); - error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); - if (error) - return error; - if (eof) - *last_fsb = new_last_fsb; + xfs_iext_last(ifp, &icur); + if (!xfs_iext_get_extent(ifp, &icur, &irec) || + aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) + return aligned_end_fsb; } - return 0; + + return end_fsb; } int xfs_iomap_write_direct( - xfs_inode_t *ip, - xfs_off_t offset, - size_t count, - xfs_bmbt_irec_t *imap, - int nmaps) + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t count_fsb, + struct xfs_bmbt_irec *imap) { - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t offset_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t count_fsb, resaligned; - xfs_extlen_t extsz; - int nimaps; - int quota_flag; - int rt; - xfs_trans_t *tp; - uint qblocks, resblks, resrtextents; - int error; - int lockmode; - int bmapi_flags = XFS_BMAPI_PREALLOC; - uint tflags = 0; - - rt = XFS_IS_REALTIME_INODE(ip); - extsz = xfs_get_extsz_hint(ip); - lockmode = XFS_ILOCK_SHARED; /* locked by caller */ - - ASSERT(xfs_isilocked(ip, lockmode)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + xfs_filblks_t resaligned; + int nimaps; + int quota_flag; + uint qblocks, resblks; + unsigned int resrtextents = 0; + int error; + int bmapi_flags = XFS_BMAPI_PREALLOC; + uint tflags = 0; - offset_fsb = XFS_B_TO_FSBT(mp, offset); - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); - if ((offset + count) > XFS_ISIZE(ip)) { - /* - * Assert that the in-core extent list is present since this can - * call xfs_iread_extents() and we only have the ilock shared. - * This should be safe because the lock was held around a bmapi - * call in the caller and we only need it to access the in-core - * list. - */ - ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & - XFS_IFEXTENTS); - error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); - if (error) - goto out_unlock; - } else { - if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) - last_fsb = min(last_fsb, (xfs_fileoff_t) - imap->br_blockcount + - imap->br_startoff); - } - count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz); - if (unlikely(rt)) { + resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, + xfs_get_extsz_hint(ip)); + if (unlikely(XFS_IS_REALTIME_INODE(ip))) { resrtextents = qblocks = resaligned; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { - resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); quota_flag = XFS_QMOPT_RES_REGBLKS; } - /* - * Drop the shared lock acquired by the caller, attach the dquot if - * necessary and move on to transaction setup. - */ - xfs_iunlock(ip, lockmode); error = xfs_qm_dqattach(ip); if (error) return error; @@ -262,8 +244,7 @@ xfs_iomap_write_direct( if (error) return error; - lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockmode); + xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -276,8 +257,8 @@ xfs_iomap_write_direct( * caller gave to us. */ nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - bmapi_flags, resblks, imap, &nimaps); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, + imap, &nimaps); if (error) goto out_res_cancel; @@ -300,7 +281,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, lockmode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_res_cancel: @@ -409,19 +390,19 @@ xfs_iomap_prealloc_size( if (offset + count <= XFS_ISIZE(ip)) return 0; - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && - (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks))) + if (!(mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks))) return 0; /* * If an explicit allocsize is set, the file is small, or we * are writing behind a hole, then use the minimum prealloc: */ - if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || + if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) || XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || !xfs_iext_peek_prev_extent(ifp, icur, &prev) || prev.br_startoff + prev.br_blockcount < offset_fsb) - return mp->m_writeio_blocks; + return mp->m_allocsize_blocks; /* * Determine the initial size of the preallocation. We are beyond the @@ -514,226 +495,13 @@ xfs_iomap_prealloc_size( while (alloc_blocks && alloc_blocks >= freesp) alloc_blocks >>= 4; check_writeio: - if (alloc_blocks < mp->m_writeio_blocks) - alloc_blocks = mp->m_writeio_blocks; + if (alloc_blocks < mp->m_allocsize_blocks) + alloc_blocks = mp->m_allocsize_blocks; trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, - mp->m_writeio_blocks); + mp->m_allocsize_blocks); return alloc_blocks; } -static int -xfs_file_iomap_begin_delay( - struct inode *inode, - loff_t offset, - loff_t count, - unsigned flags, - struct iomap *iomap) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t maxbytes_fsb = - XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb; - struct xfs_bmbt_irec imap, cmap; - struct xfs_iext_cursor icur, ccur; - xfs_fsblock_t prealloc_blocks = 0; - bool eof = false, cow_eof = false, shared = false; - u16 iomap_flags = 0; - int whichfork = XFS_DATA_FORK; - int error = 0; - - ASSERT(!XFS_IS_REALTIME_INODE(ip)); - ASSERT(!xfs_get_extsz_hint(ip)); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - error = -EFSCORRUPTED; - goto out_unlock; - } - - XFS_STATS_INC(mp, xs_blk_mapw); - - if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - goto out_unlock; - } - - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - - /* - * Search the data fork fork first to look up our source mapping. We - * always need the data fork map, as we have to return it to the - * iomap code so that the higher level write code can read data in to - * perform read-modify-write cycles for unaligned writes. - */ - eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); - if (eof) - imap.br_startoff = end_fsb; /* fake hole until the end */ - - /* We never need to allocate blocks for zeroing a hole. */ - if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { - xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); - goto out_unlock; - } - - /* - * Search the COW fork extent list even if we did not find a data fork - * extent. This serves two purposes: first this implements the - * speculative preallocation using cowextsize, so that we also unshare - * block adjacent to shared blocks instead of just the shared blocks - * themselves. Second the lookup in the extent list is generally faster - * than going out to the shared extent tree. - */ - if (xfs_is_cow_inode(ip)) { - if (!ip->i_cowfp) { - ASSERT(!xfs_is_reflink_inode(ip)); - xfs_ifork_init_cow(ip); - } - cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, - &ccur, &cmap); - if (!cow_eof && cmap.br_startoff <= offset_fsb) { - trace_xfs_reflink_cow_found(ip, &cmap); - whichfork = XFS_COW_FORK; - goto done; - } - } - - if (imap.br_startoff <= offset_fsb) { - /* - * For reflink files we may need a delalloc reservation when - * overwriting shared extents. This includes zeroing of - * existing extents that contain data. - */ - if (!xfs_is_cow_inode(ip) || - ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, - &imap); - goto done; - } - - xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); - - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_inode_need_cow(ip, &imap, &shared); - if (error) - goto out_unlock; - - /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) { - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, - &imap); - goto done; - } - - /* - * Fork all the shared blocks from our write offset until the - * end of the extent. - */ - whichfork = XFS_COW_FORK; - end_fsb = imap.br_startoff + imap.br_blockcount; - } else { - /* - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES - * pages to keep the chunks of work done where somewhat - * symmetric with the work writeback does. This is a completely - * arbitrary number pulled out of thin air. - * - * Note that the values needs to be less than 32-bits wide until - * the lower level functions are updated. - */ - count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - - if (xfs_is_always_cow_inode(ip)) - whichfork = XFS_COW_FORK; - } - - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - - if (eof) { - prealloc_blocks = xfs_iomap_prealloc_size(ip, whichfork, offset, - count, &icur); - if (prealloc_blocks) { - xfs_extlen_t align; - xfs_off_t end_offset; - xfs_fileoff_t p_end_fsb; - - end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; - - align = xfs_eof_alignment(ip, 0); - if (align) - p_end_fsb = roundup_64(p_end_fsb, align); - - p_end_fsb = min(p_end_fsb, maxbytes_fsb); - ASSERT(p_end_fsb > offset_fsb); - prealloc_blocks = p_end_fsb - end_fsb; - } - } - -retry: - error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, - whichfork == XFS_DATA_FORK ? &imap : &cmap, - whichfork == XFS_DATA_FORK ? &icur : &ccur, - whichfork == XFS_DATA_FORK ? eof : cow_eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc_blocks) { - prealloc_blocks = 0; - goto retry; - } - /*FALLTHRU*/ - default: - goto out_unlock; - } - - /* - * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch - * them out if the write happens to fail. - */ - if (whichfork == XFS_DATA_FORK) { - iomap_flags |= IOMAP_F_NEW; - trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); - } else { - trace_xfs_iomap_alloc(ip, offset, count, whichfork, &cmap); - } -done: - if (whichfork == XFS_COW_FORK) { - if (imap.br_startoff > offset_fsb) { - xfs_trim_extent(&cmap, offset_fsb, - imap.br_startoff - offset_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &cmap, - IOMAP_F_SHARED); - goto out_unlock; - } - /* ensure we only report blocks we have a reservation for */ - xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); - shared = true; - } - if (shared) - iomap_flags |= IOMAP_F_SHARED; - error = xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; -} - int xfs_iomap_write_unwritten( xfs_inode_t *ip, @@ -771,6 +539,11 @@ xfs_iomap_write_unwritten( */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; + /* Attach dquots so that bmbt splits are accounted correctly. */ + error = xfs_qm_dqattach(ip); + if (error) + return error; + do { /* * Set up a transaction to convert the range of extents @@ -789,6 +562,11 @@ xfs_iomap_write_unwritten( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto error_on_bmapi_transaction; + /* * Modify the unwritten extent state of the buffer. */ @@ -846,23 +624,42 @@ error_on_bmapi_transaction: static inline bool imap_needs_alloc( struct inode *inode, + unsigned flags, struct xfs_bmbt_irec *imap, int nimaps) { - return !nimaps || - imap->br_startblock == HOLESTARTBLOCK || - imap->br_startblock == DELAYSTARTBLOCK || - (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); + /* don't allocate blocks when just zeroing */ + if (flags & IOMAP_ZERO) + return false; + if (!nimaps || + imap->br_startblock == HOLESTARTBLOCK || + imap->br_startblock == DELAYSTARTBLOCK) + return true; + /* we convert unwritten extents before copying the data for DAX */ + if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN) + return true; + return false; } static inline bool -needs_cow_for_zeroing( +imap_needs_cow( + struct xfs_inode *ip, + unsigned int flags, struct xfs_bmbt_irec *imap, int nimaps) { - return nimaps && - imap->br_startblock != HOLESTARTBLOCK && - imap->br_state != XFS_EXT_UNWRITTEN; + if (!xfs_is_cow_inode(ip)) + return false; + + /* when zeroing we don't have to COW holes or unwritten extents */ + if (flags & IOMAP_ZERO) { + if (!nimaps || + imap->br_startblock == HOLESTARTBLOCK || + imap->br_state == XFS_EXT_UNWRITTEN) + return false; + } + + return true; } static int @@ -878,15 +675,8 @@ xfs_ilock_for_iomap( * COW writes may allocate delalloc space or convert unwritten COW * extents, so we need to make sure to take the lock exclusively here. */ - if (xfs_is_cow_inode(ip) && is_write) { - /* - * FIXME: It could still overwrite on unshared extents and not - * need allocation. - */ - if (flags & IOMAP_NOWAIT) - return -EAGAIN; + if (xfs_is_cow_inode(ip) && is_write) mode = XFS_ILOCK_EXCL; - } /* * Extents not yet cached requires exclusive access, don't block. This @@ -923,7 +713,7 @@ relock: } static int -xfs_file_iomap_begin( +xfs_direct_write_iomap_begin( struct inode *inode, loff_t offset, loff_t length, @@ -933,103 +723,63 @@ xfs_file_iomap_begin( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - struct xfs_bmbt_irec imap; - xfs_fileoff_t offset_fsb, end_fsb; + struct xfs_bmbt_irec imap, cmap; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); int nimaps = 1, error = 0; bool shared = false; u16 iomap_flags = 0; unsigned lockmode; + ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && !(flags & IOMAP_DIRECT) && - !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { - /* Reserve delalloc blocks for regular writeback. */ - return xfs_file_iomap_begin_delay(inode, offset, length, flags, - iomap); - } - /* - * Lock the inode in the manner required for the specified operation and - * check for as many conditions that would result in blocking as - * possible. This removes most of the non-blocking checks from the - * mapping code below. + * Writes that span EOF might trigger an IO size update on completion, + * so consider them to be dirty for the purposes of O_DSYNC even if + * there is no other metadata changes pending or have been made here. */ + if (offset + length > i_size_read(inode)) + iomap_flags |= IOMAP_F_DIRTY; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; - ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset > mp->m_super->s_maxbytes - length) - length = mp->m_super->s_maxbytes - offset; - offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, offset + length); - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); if (error) goto out_unlock; - if (flags & IOMAP_REPORT) { - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared); - if (error) + if (imap_needs_cow(ip, flags, &imap, nimaps)) { + error = -EAGAIN; + if (flags & IOMAP_NOWAIT) goto out_unlock; - } - - /* Non-modifying mapping requested, so we are done */ - if (!(flags & (IOMAP_WRITE | IOMAP_ZERO))) - goto out_found; - - /* - * Break shared extents if necessary. Checks for non-blocking IO have - * been done up front, so we don't need to do them here. - */ - if (xfs_is_cow_inode(ip)) { - struct xfs_bmbt_irec cmap; - bool directio = (flags & IOMAP_DIRECT); - - /* if zeroing doesn't need COW allocation, then we are done. */ - if ((flags & IOMAP_ZERO) && - !needs_cow_for_zeroing(&imap, nimaps)) - goto out_found; /* may drop and re-acquire the ilock */ - cmap = imap; - error = xfs_reflink_allocate_cow(ip, &cmap, &shared, &lockmode, - directio); + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, + &lockmode, flags & IOMAP_DIRECT); if (error) goto out_unlock; - - /* - * For buffered writes we need to report the address of the - * previous block (if there was any) so that the higher level - * write code can perform read-modify-write operations; we - * won't need the CoW fork mapping until writeback. For direct - * I/O, which must be block aligned, we need to report the - * newly allocated address. If the data fork has a hole, copy - * the COW fork mapping to avoid allocating to the data fork. - */ - if (directio || imap.br_startblock == HOLESTARTBLOCK) - imap = cmap; - + if (shared) + goto out_found_cow; end_fsb = imap.br_startoff + imap.br_blockcount; length = XFS_FSB_TO_B(mp, end_fsb) - offset; } - /* Don't need to allocate over holes when doing zeroing operations. */ - if (flags & IOMAP_ZERO) - goto out_found; + if (imap_needs_alloc(inode, flags, &imap, nimaps)) + goto allocate_blocks; - if (!imap_needs_alloc(inode, &imap, nimaps)) - goto out_found; + xfs_iunlock(ip, lockmode); + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); - /* If nowait is set bail since we are going to make allocations. */ - if (flags & IOMAP_NOWAIT) { - error = -EAGAIN; +allocate_blocks: + error = -EAGAIN; + if (flags & IOMAP_NOWAIT) goto out_unlock; - } /* * We cap the maximum length we map to a sane size to keep the chunks @@ -1041,57 +791,273 @@ xfs_file_iomap_begin( * lower level functions are updated. */ length = min_t(loff_t, length, 1024 * PAGE_SIZE); + end_fsb = xfs_iomap_end_fsb(mp, offset, length); - /* - * xfs_iomap_write_direct() expects the shared lock. It is unlocked on - * return. - */ - if (lockmode == XFS_ILOCK_EXCL) - xfs_ilock_demote(ip, lockmode); - error = xfs_iomap_write_direct(ip, offset, length, &imap, - nimaps); + if (offset + length > XFS_ISIZE(ip)) + end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); + else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) + end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); + xfs_iunlock(ip, lockmode); + + error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, + &imap); if (error) return error; - iomap_flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW); + +out_found_cow: + xfs_iunlock(ip, lockmode); + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + if (imap.br_startblock != HOLESTARTBLOCK) { + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + if (error) + return error; + } + return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + +out_unlock: + xfs_iunlock(ip, lockmode); + return error; +} + +const struct iomap_ops xfs_direct_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, +}; + +static int +xfs_buffered_write_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t count, + unsigned flags, + struct iomap *iomap, + struct iomap *srcmap) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); + struct xfs_bmbt_irec imap, cmap; + struct xfs_iext_cursor icur, ccur; + xfs_fsblock_t prealloc_blocks = 0; + bool eof = false, cow_eof = false, shared = false; + int allocfork = XFS_DATA_FORK; + int error = 0; + + /* we can't use delayed allocations when using extent size hints */ + if (xfs_get_extsz_hint(ip)) + return xfs_direct_write_iomap_begin(inode, offset, count, + flags, iomap, srcmap); + + ASSERT(!XFS_IS_REALTIME_INODE(ip)); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, XFS_DATA_FORK)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + error = -EFSCORRUPTED; + goto out_unlock; + } + + XFS_STATS_INC(mp, xs_blk_mapw); + + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + goto out_unlock; + } -out_finish: /* - * Writes that span EOF might trigger an IO size update on completion, - * so consider them to be dirty for the purposes of O_DSYNC even if - * there is no other metadata changes pending or have been made here. + * Search the data fork fork first to look up our source mapping. We + * always need the data fork map, as we have to return it to the + * iomap code so that the higher level write code can read data in to + * perform read-modify-write cycles for unaligned writes. */ - if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode)) - iomap_flags |= IOMAP_F_DIRTY; - if (shared) - iomap_flags |= IOMAP_F_SHARED; - return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); + if (eof) + imap.br_startoff = end_fsb; /* fake hole until the end */ -out_found: - ASSERT(nimaps); - xfs_iunlock(ip, lockmode); - trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - goto out_finish; + /* We never need to allocate blocks for zeroing a hole. */ + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); + goto out_unlock; + } + + /* + * Search the COW fork extent list even if we did not find a data fork + * extent. This serves two purposes: first this implements the + * speculative preallocation using cowextsize, so that we also unshare + * block adjacent to shared blocks instead of just the shared blocks + * themselves. Second the lookup in the extent list is generally faster + * than going out to the shared extent tree. + */ + if (xfs_is_cow_inode(ip)) { + if (!ip->i_cowfp) { + ASSERT(!xfs_is_reflink_inode(ip)); + xfs_ifork_init_cow(ip); + } + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, + &ccur, &cmap); + if (!cow_eof && cmap.br_startoff <= offset_fsb) { + trace_xfs_reflink_cow_found(ip, &cmap); + goto found_cow; + } + } + + if (imap.br_startoff <= offset_fsb) { + /* + * For reflink files we may need a delalloc reservation when + * overwriting shared extents. This includes zeroing of + * existing extents that contain data. + */ + if (!xfs_is_cow_inode(ip) || + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto found_imap; + } + + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); + + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_inode_need_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + + /* Not shared? Just report the (potentially capped) extent. */ + if (!shared) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto found_imap; + } + + /* + * Fork all the shared blocks from our write offset until the + * end of the extent. + */ + allocfork = XFS_COW_FORK; + end_fsb = imap.br_startoff + imap.br_blockcount; + } else { + /* + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES + * pages to keep the chunks of work done where somewhat + * symmetric with the work writeback does. This is a completely + * arbitrary number pulled out of thin air. + * + * Note that the values needs to be less than 32-bits wide until + * the lower level functions are updated. + */ + count = min_t(loff_t, count, 1024 * PAGE_SIZE); + end_fsb = xfs_iomap_end_fsb(mp, offset, count); + + if (xfs_is_always_cow_inode(ip)) + allocfork = XFS_COW_FORK; + } + + error = xfs_qm_dqattach_locked(ip, false); + if (error) + goto out_unlock; + + if (eof) { + prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset, + count, &icur); + if (prealloc_blocks) { + xfs_extlen_t align; + xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; + + end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; + + align = xfs_eof_alignment(ip); + if (align) + p_end_fsb = roundup_64(p_end_fsb, align); + + p_end_fsb = min(p_end_fsb, + XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; + } + } + +retry: + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, + allocfork == XFS_DATA_FORK ? &imap : &cmap, + allocfork == XFS_DATA_FORK ? &icur : &ccur, + allocfork == XFS_DATA_FORK ? eof : cow_eof); + switch (error) { + case 0: + break; + case -ENOSPC: + case -EDQUOT: + /* retry without any preallocation */ + trace_xfs_delalloc_enospc(ip, offset, count); + if (prealloc_blocks) { + prealloc_blocks = 0; + goto retry; + } + /*FALLTHRU*/ + default: + goto out_unlock; + } + + if (allocfork == XFS_COW_FORK) { + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); + goto found_cow; + } + + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW); + +found_imap: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + +found_cow: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (imap.br_startoff <= offset_fsb) { + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + if (error) + return error; + } else { + xfs_trim_extent(&cmap, offset_fsb, + imap.br_startoff - offset_fsb); + } + return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); out_unlock: - xfs_iunlock(ip, lockmode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } static int -xfs_file_iomap_end_delalloc( - struct xfs_inode *ip, +xfs_buffered_write_iomap_end( + struct inode *inode, loff_t offset, loff_t length, ssize_t written, + unsigned flags, struct iomap *iomap) { + struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; + if (iomap->type != IOMAP_DELALLOC) + return 0; + /* * Behave as if the write failed if drop writes is enabled. Set the NEW * flag to force delalloc cleanup. @@ -1136,24 +1102,51 @@ xfs_file_iomap_end_delalloc( return 0; } +const struct iomap_ops xfs_buffered_write_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, + .iomap_end = xfs_buffered_write_iomap_end, +}; + static int -xfs_file_iomap_end( +xfs_read_iomap_begin( struct inode *inode, loff_t offset, loff_t length, - ssize_t written, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct iomap *srcmap) { - if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) - return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written, iomap); - return 0; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec imap; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); + int nimaps = 1, error = 0; + bool shared = false; + unsigned lockmode; + + ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + error = xfs_ilock_for_iomap(ip, flags, &lockmode); + if (error) + return error; + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, 0); + if (!error && (flags & IOMAP_REPORT)) + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + xfs_iunlock(ip, lockmode); + + if (error) + return error; + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0); } -const struct iomap_ops xfs_iomap_ops = { - .iomap_begin = xfs_file_iomap_begin, - .iomap_end = xfs_file_iomap_end, +const struct iomap_ops xfs_read_iomap_ops = { + .iomap_begin = xfs_read_iomap_begin, }; static int @@ -1196,8 +1189,7 @@ xfs_seek_iomap_begin( /* * Fake a hole until the end of the file. */ - data_fsb = min(XFS_B_TO_FSB(mp, offset + length), - XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); + data_fsb = xfs_iomap_end_fsb(mp, offset, length); } /* diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 71d0ae460c44..7d3703556d0e 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -11,13 +11,14 @@ struct xfs_inode; struct xfs_bmbt_irec; -int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, - struct xfs_bmbt_irec *, int); +int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, + xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); +xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, + xfs_fileoff_t end_fsb); int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *, u16); -xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); static inline xfs_filblks_t xfs_aligned_fsb_count( @@ -39,7 +40,9 @@ xfs_aligned_fsb_count( return count_fsb; } -extern const struct iomap_ops xfs_iomap_ops; +extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_direct_write_iomap_ops; +extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index fe285d123d69..8afe69ca188b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -20,6 +20,7 @@ #include "xfs_symlink.h" #include "xfs_dir2.h" #include "xfs_iomap.h" +#include "xfs_error.h" #include <linux/xattr.h> #include <linux/posix_acl.h> @@ -470,20 +471,57 @@ xfs_vn_get_link_inline( struct inode *inode, struct delayed_call *done) { + struct xfs_inode *ip = XFS_I(inode); char *link; - ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); + ASSERT(ip->i_df.if_flags & XFS_IFINLINE); /* * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if * if_data is junk. */ - link = XFS_I(inode)->i_df.if_u1.if_data; - if (!link) + link = ip->i_df.if_u1.if_data; + if (XFS_IS_CORRUPT(ip->i_mount, !link)) return ERR_PTR(-EFSCORRUPTED); return link; } +static uint32_t +xfs_stat_blksize( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + /* + * If the file blocks are being allocated from a realtime volume, then + * always return the realtime extent size. + */ + if (XFS_IS_REALTIME_INODE(ip)) + return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + + /* + * Allow large block sizes to be reported to userspace programs if the + * "largeio" mount option is used. + * + * If compatibility mode is specified, simply return the basic unit of + * caching so that we don't get inefficient read/modify/write I/O from + * user apps. Otherwise.... + * + * If the underlying volume is a stripe, then return the stripe width in + * bytes as the recommended I/O size. It is not a stripe and we've set a + * default buffered I/O size, return that, otherwise return the compat + * default. + */ + if (mp->m_flags & XFS_MOUNT_LARGEIO) { + if (mp->m_swidth) + return mp->m_swidth << mp->m_sb.sb_blocklog; + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + return 1U << mp->m_allocsize_log; + } + + return PAGE_SIZE; +} + STATIC int xfs_vn_getattr( const struct path *path, @@ -516,8 +554,7 @@ xfs_vn_getattr( if (ip->i_d.di_version == 3) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; - stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; - stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; + stat->btime = ip->i_d.di_crtime; } } @@ -543,16 +580,7 @@ xfs_vn_getattr( stat->rdev = inode->i_rdev; break; default: - if (XFS_IS_REALTIME_INODE(ip)) { - /* - * If the file blocks are being allocated from a - * realtime volume, then return the inode's realtime - * extent size or the realtime volume's extent size. - */ - stat->blksize = - xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; - } else - stat->blksize = xfs_preferred_iosize(mp); + stat->blksize = xfs_stat_blksize(ip); stat->rdev = 0; break; } @@ -664,7 +692,7 @@ xfs_setattr_nonsize( ASSERT(gdqp == NULL); error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), xfs_kgid_to_gid(gid), - xfs_get_projid(ip), + ip->i_d.di_projid, qflags, &udqp, &gdqp, NULL); if (error) return error; @@ -883,10 +911,10 @@ xfs_setattr_size( if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_iomap_ops); + &did_zeroing, &xfs_buffered_write_iomap_ops); } else { error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_iomap_ops); + &xfs_buffered_write_iomap_ops); } if (error) @@ -1114,7 +1142,7 @@ xfs_vn_fiemap( &xfs_xattr_iomap_ops); } else { error = iomap_fiemap(inode, fieinfo, start, length, - &xfs_iomap_ops); + &xfs_read_iomap_ops); } xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); @@ -1227,7 +1255,7 @@ xfs_inode_supports_dax( return false; /* Device has to support DAX too. */ - return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL; + return xfs_inode_buftarg(ip)->bt_daxdev != NULL; } STATIC void @@ -1290,9 +1318,7 @@ xfs_setup_inode( lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_dir_key); lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); - ip->d_ops = ip->i_mount->m_dir_inode_ops; } else { - ip->d_ops = ip->i_mount->m_nondir_inode_ops; lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 884950adbd16..4b31c29b7e6b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -84,7 +84,7 @@ xfs_bulkstat_one_int( /* xfs_iget returns the following without needing * further change. */ - buf->bs_projectid = xfs_get_projid(ip); + buf->bs_projectid = ip->i_d.di_projid; buf->bs_ino = ino; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; @@ -97,8 +97,8 @@ xfs_bulkstat_one_int( buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; buf->bs_ctime = inode->i_ctime.tv_sec; buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; - buf->bs_btime = dic->di_crtime.t_sec; - buf->bs_btime_nsec = dic->di_crtime.t_nsec; + buf->bs_btime = dic->di_crtime.tv_sec; + buf->bs_btime_nsec = dic->di_crtime.tv_nsec; buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode; diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index aa375cf53021..233dcc8784db 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -298,7 +298,8 @@ xfs_iwalk_ag_start( error = xfs_inobt_get_rec(*curpp, irec, has_more); if (error) return error; - XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1); + if (XFS_IS_CORRUPT(mp, *has_more != 1)) + return -EFSCORRUPTED; /* * If the LE lookup yielded an inobt record before the cursor position, diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index ca15105681ca..8738bb03f253 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -223,26 +223,32 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, char *data, unsigned int op); #define ASSERT_ALWAYS(expr) \ - (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ - (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG */ #ifdef XFS_WARN #define ASSERT(expr) \ - (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG && !XFS_WARN */ -#define ASSERT(expr) ((void)0) +#define ASSERT(expr) ((void)0) #endif /* XFS_WARN */ #endif /* DEBUG */ +#define XFS_IS_CORRUPT(mp, expr) \ + (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \ + NULL, 0, __FILE__, __LINE__, \ + __this_address), \ + true : false) + #define STATIC static noinline #ifdef CONFIG_XFS_RT diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 641d07f30a27..6a147c63a8a6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -57,10 +57,6 @@ xlog_state_get_iclog_space( struct xlog_ticket *ticket, int *continued_write, int *logoffsetp); -STATIC int -xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog); STATIC void xlog_state_switch_iclogs( struct xlog *log, @@ -83,7 +79,10 @@ STATIC void xlog_ungrant_log_space( struct xlog *log, struct xlog_ticket *ticket); - +STATIC void +xlog_sync( + struct xlog *log, + struct xlog_in_core *iclog); #if defined(DEBUG) STATIC void xlog_verify_dest_ptr( @@ -552,16 +551,71 @@ xfs_log_done( return lsn; } +static bool +__xlog_state_release_iclog( + struct xlog *log, + struct xlog_in_core *iclog) +{ + lockdep_assert_held(&log->l_icloglock); + + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { + /* update tail before writing to iclog */ + xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + iclog->ic_state = XLOG_STATE_SYNCING; + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog, tail_lsn); + /* cycle incremented when incrementing curr_block */ + return true; + } + + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return false; +} + +/* + * Flush iclog to disk if this is the last reference to the given iclog and the + * it is in the WANT_SYNC state. + */ +static int +xlog_state_release_iclog( + struct xlog *log, + struct xlog_in_core *iclog) +{ + lockdep_assert_held(&log->l_icloglock); + + if (iclog->ic_state == XLOG_STATE_IOERROR) + return -EIO; + + if (atomic_dec_and_test(&iclog->ic_refcnt) && + __xlog_state_release_iclog(log, iclog)) { + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); + } + + return 0; +} + int xfs_log_release_iclog( - struct xfs_mount *mp, + struct xfs_mount *mp, struct xlog_in_core *iclog) { - if (xlog_state_release_iclog(mp->m_log, iclog)) { + struct xlog *log = mp->m_log; + bool sync; + + if (iclog->ic_state == XLOG_STATE_IOERROR) { xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); return -EIO; } + if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) { + sync = __xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + if (sync) + xlog_sync(log, iclog); + } return 0; } @@ -866,10 +920,7 @@ out_err: iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); xlog_state_want_sync(log, iclog); - spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); - - spin_lock(&log->l_icloglock); switch (iclog->ic_state) { default: if (!XLOG_FORCED_SHUTDOWN(log)) { @@ -924,8 +975,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) #ifdef DEBUG first_iclog = iclog = log->l_iclog; do { - if (!(iclog->ic_state & XLOG_STATE_IOERROR)) { - ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE); + if (iclog->ic_state != XLOG_STATE_IOERROR) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); ASSERT(iclog->ic_offset == 0); } iclog = iclog->ic_next; @@ -950,21 +1001,17 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - xlog_state_want_sync(log, iclog); - spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); - - spin_lock(&log->l_icloglock); - - if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE - || iclog->ic_state == XLOG_STATE_DIRTY - || iclog->ic_state == XLOG_STATE_IOERROR) ) { - - xlog_wait(&iclog->ic_force_wait, - &log->l_icloglock); - } else { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: + case XLOG_STATE_DIRTY: + case XLOG_STATE_IOERROR: spin_unlock(&log->l_icloglock); + break; + default: + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + break; } } @@ -1254,7 +1301,7 @@ xlog_ioend_work( * didn't succeed. */ aborted = true; - } else if (iclog->ic_state & XLOG_STATE_IOERROR) { + } else if (iclog->ic_state == XLOG_STATE_IOERROR) { aborted = true; } @@ -1479,7 +1526,7 @@ xlog_alloc_log( log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, - mp->m_fsname); + mp->m_super->s_id); if (!log->l_ioend_workqueue) goto out_free_iclog; @@ -1727,7 +1774,7 @@ xlog_write_iclog( * across the log IO to archieve that. */ down(&iclog->ic_sema); - if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) { + if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) { /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of @@ -1735,13 +1782,11 @@ xlog_write_iclog( * the buffer manually, the code needs to be kept in sync * with the I/O completion path. */ - xlog_state_done_syncing(iclog, XFS_LI_ABORTED); + xlog_state_done_syncing(iclog, true); up(&iclog->ic_sema); return; } - iclog->ic_io_size = count; - bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; @@ -1751,9 +1796,9 @@ xlog_write_iclog( if (need_flush) iclog->ic_bio.bi_opf |= REQ_PREFLUSH; - xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size); + xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count); if (is_vmalloc_addr(iclog->ic_data)) - flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size); + flush_kernel_vmap_range(iclog->ic_data, count); /* * If this log buffer would straddle the end of the log we will have @@ -1969,7 +2014,6 @@ xlog_dealloc_log( /* * Update counters atomically now that memcpy is done. */ -/* ARGSUSED */ static inline void xlog_state_finish_copy( struct xlog *log, @@ -1977,16 +2021,11 @@ xlog_state_finish_copy( int record_cnt, int copy_bytes) { - spin_lock(&log->l_icloglock); + lockdep_assert_held(&log->l_icloglock); be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); iclog->ic_offset += copy_bytes; - - spin_unlock(&log->l_icloglock); -} /* xlog_state_finish_copy */ - - - +} /* * print out info relating to regions written which consume @@ -2263,15 +2302,18 @@ xlog_write_copy_finish( int log_offset, struct xlog_in_core **commit_iclog) { + int error; + if (*partial_copy) { /* * This iclog has already been marked WANT_SYNC by * xlog_state_get_iclog_space. */ + spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); *record_cnt = 0; *data_cnt = 0; - return xlog_state_release_iclog(log, iclog); + goto release_iclog; } *partial_copy = 0; @@ -2279,21 +2321,25 @@ xlog_write_copy_finish( if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { /* no more space in this iclog - push it. */ + spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); *record_cnt = 0; *data_cnt = 0; - spin_lock(&log->l_icloglock); xlog_state_want_sync(log, iclog); - spin_unlock(&log->l_icloglock); - if (!commit_iclog) - return xlog_state_release_iclog(log, iclog); + goto release_iclog; + spin_unlock(&log->l_icloglock); ASSERT(flags & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } return 0; + +release_iclog: + error = xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + return error; } /* @@ -2355,7 +2401,7 @@ xlog_write( int contwr = 0; int record_cnt = 0; int data_cnt = 0; - int error; + int error = 0; *start_lsn = 0; @@ -2506,13 +2552,17 @@ next_lv: ASSERT(len == 0); + spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); - if (!commit_iclog) - return xlog_state_release_iclog(log, iclog); + if (commit_iclog) { + ASSERT(flags & XLOG_COMMIT_TRANS); + *commit_iclog = iclog; + } else { + error = xlog_state_release_iclog(log, iclog); + } + spin_unlock(&log->l_icloglock); - ASSERT(flags & XLOG_COMMIT_TRANS); - *commit_iclog = iclog; - return 0; + return error; } @@ -2548,7 +2598,7 @@ xlog_state_clean_iclog( int changed = 0; /* Prepare the completed iclog. */ - if (!(dirty_iclog->ic_state & XLOG_STATE_IOERROR)) + if (dirty_iclog->ic_state != XLOG_STATE_IOERROR) dirty_iclog->ic_state = XLOG_STATE_DIRTY; /* Walk all the iclogs to update the ordered active state. */ @@ -2639,7 +2689,8 @@ xlog_get_lowest_lsn( xfs_lsn_t lowest_lsn = 0, lsn; do { - if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) + if (iclog->ic_state == XLOG_STATE_ACTIVE || + iclog->ic_state == XLOG_STATE_DIRTY) continue; lsn = be64_to_cpu(iclog->ic_header.h_lsn); @@ -2699,61 +2750,48 @@ static bool xlog_state_iodone_process_iclog( struct xlog *log, struct xlog_in_core *iclog, - struct xlog_in_core *completed_iclog, bool *ioerror) { xfs_lsn_t lowest_lsn; xfs_lsn_t header_lsn; - /* Skip all iclogs in the ACTIVE & DIRTY states */ - if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: + case XLOG_STATE_DIRTY: + /* + * Skip all iclogs in the ACTIVE & DIRTY states: + */ return false; - - /* - * Between marking a filesystem SHUTDOWN and stopping the log, we do - * flush all iclogs to disk (if there wasn't a log I/O error). So, we do - * want things to go smoothly in case of just a SHUTDOWN w/o a - * LOG_IO_ERROR. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) { + case XLOG_STATE_IOERROR: + /* + * Between marking a filesystem SHUTDOWN and stopping the log, + * we do flush all iclogs to disk (if there wasn't a log I/O + * error). So, we do want things to go smoothly in case of just + * a SHUTDOWN w/o a LOG_IO_ERROR. + */ *ioerror = true; return false; - } - - /* - * Can only perform callbacks in order. Since this iclog is not in the - * DONE_SYNC/ DO_CALLBACK state, we skip the rest and just try to clean - * up. If we set our iclog to DO_CALLBACK, we will not process it when - * we retry since a previous iclog is in the CALLBACK and the state - * cannot change since we are holding the l_icloglock. - */ - if (!(iclog->ic_state & - (XLOG_STATE_DONE_SYNC | XLOG_STATE_DO_CALLBACK))) { - if (completed_iclog && - (completed_iclog->ic_state == XLOG_STATE_DONE_SYNC)) { - completed_iclog->ic_state = XLOG_STATE_DO_CALLBACK; - } + case XLOG_STATE_DONE_SYNC: + /* + * Now that we have an iclog that is in the DONE_SYNC state, do + * one more check here to see if we have chased our tail around. + * If this is not the lowest lsn iclog, then we will leave it + * for another completion to process. + */ + header_lsn = be64_to_cpu(iclog->ic_header.h_lsn); + lowest_lsn = xlog_get_lowest_lsn(log); + if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) + return false; + xlog_state_set_callback(log, iclog, header_lsn); + return false; + default: + /* + * Can only perform callbacks in order. Since this iclog is not + * in the DONE_SYNC state, we skip the rest and just try to + * clean up. + */ return true; } - - /* - * We now have an iclog that is in either the DO_CALLBACK or DONE_SYNC - * states. The other states (WANT_SYNC, SYNCING, or CALLBACK were caught - * by the above if and are going to clean (i.e. we aren't doing their - * callbacks) see the above if. - * - * We will do one more check here to see if we have chased our tail - * around. If this is not the lowest lsn iclog, then we will leave it - * for another completion to process. - */ - header_lsn = be64_to_cpu(iclog->ic_header.h_lsn); - lowest_lsn = xlog_get_lowest_lsn(log); - if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) - return false; - - xlog_state_set_callback(log, iclog, header_lsn); - return false; - } /* @@ -2770,6 +2808,8 @@ xlog_state_do_iclog_callbacks( struct xlog *log, struct xlog_in_core *iclog, bool aborted) + __releases(&log->l_icloglock) + __acquires(&log->l_icloglock) { spin_unlock(&log->l_icloglock); spin_lock(&iclog->ic_callback_lock); @@ -2792,57 +2832,13 @@ xlog_state_do_iclog_callbacks( spin_unlock(&iclog->ic_callback_lock); } -#ifdef DEBUG -/* - * Make one last gasp attempt to see if iclogs are being left in limbo. If the - * above loop finds an iclog earlier than the current iclog and in one of the - * syncing states, the current iclog is put into DO_CALLBACK and the callbacks - * are deferred to the completion of the earlier iclog. Walk the iclogs in order - * and make sure that no iclog is in DO_CALLBACK unless an earlier iclog is in - * one of the syncing states. - * - * Note that SYNCING|IOERROR is a valid state so we cannot just check for - * ic_state == SYNCING. - */ -static void -xlog_state_callback_check_state( - struct xlog *log) -{ - struct xlog_in_core *first_iclog = log->l_iclog; - struct xlog_in_core *iclog = first_iclog; - - do { - ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK); - /* - * Terminate the loop if iclogs are found in states - * which will cause other threads to clean up iclogs. - * - * SYNCING - i/o completion will go through logs - * DONE_SYNC - interrupt thread should be waiting for - * l_icloglock - * IOERROR - give up hope all ye who enter here - */ - if (iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state & XLOG_STATE_SYNCING || - iclog->ic_state == XLOG_STATE_DONE_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR ) - break; - iclog = iclog->ic_next; - } while (first_iclog != iclog); -} -#else -#define xlog_state_callback_check_state(l) ((void)0) -#endif - STATIC void xlog_state_do_callback( struct xlog *log, - bool aborted, - struct xlog_in_core *ciclog) + bool aborted) { struct xlog_in_core *iclog; struct xlog_in_core *first_iclog; - bool did_callbacks = false; bool cycled_icloglock; bool ioerror; int flushcnt = 0; @@ -2866,11 +2862,11 @@ xlog_state_do_callback( do { if (xlog_state_iodone_process_iclog(log, iclog, - ciclog, &ioerror)) + &ioerror)) break; - if (!(iclog->ic_state & - (XLOG_STATE_CALLBACK | XLOG_STATE_IOERROR))) { + if (iclog->ic_state != XLOG_STATE_CALLBACK && + iclog->ic_state != XLOG_STATE_IOERROR) { iclog = iclog->ic_next; continue; } @@ -2886,8 +2882,6 @@ xlog_state_do_callback( iclog = iclog->ic_next; } while (first_iclog != iclog); - did_callbacks |= cycled_icloglock; - if (repeats > 5000) { flushcnt += repeats; repeats = 0; @@ -2897,10 +2891,8 @@ xlog_state_do_callback( } } while (!ioerror && cycled_icloglock); - if (did_callbacks) - xlog_state_callback_check_state(log); - - if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) + if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || + log->l_iclog->ic_state == XLOG_STATE_IOERROR) wake_up_all(&log->l_flush_wait); spin_unlock(&log->l_icloglock); @@ -2929,8 +2921,6 @@ xlog_state_done_syncing( spin_lock(&log->l_icloglock); - ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || - iclog->ic_state == XLOG_STATE_IOERROR); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* @@ -2939,8 +2929,10 @@ xlog_state_done_syncing( * and none should ever be attempted to be written to disk * again. */ - if (iclog->ic_state != XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_SYNCING) iclog->ic_state = XLOG_STATE_DONE_SYNC; + else + ASSERT(iclog->ic_state == XLOG_STATE_IOERROR); /* * Someone could be sleeping prior to writing out the next @@ -2949,7 +2941,7 @@ xlog_state_done_syncing( */ wake_up_all(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); - xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ + xlog_state_do_callback(log, aborted); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2983,7 +2975,6 @@ xlog_state_get_iclog_space( int log_offset; xlog_rec_header_t *head; xlog_in_core_t *iclog; - int error; restart: spin_lock(&log->l_icloglock); @@ -3032,24 +3023,22 @@ restart: * can fit into remaining data section. */ if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { + int error = 0; + xlog_state_switch_iclogs(log, iclog, iclog->ic_size); /* - * If I'm the only one writing to this iclog, sync it to disk. - * We need to do an atomic compare and decrement here to avoid - * racing with concurrent atomic_dec_and_lock() calls in + * If we are the only one writing to this iclog, sync it to + * disk. We need to do an atomic compare and decrement here to + * avoid racing with concurrent atomic_dec_and_lock() calls in * xlog_state_release_iclog() when there is more than one * reference to the iclog. */ - if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { - /* we are the only one */ - spin_unlock(&log->l_icloglock); + if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) error = xlog_state_release_iclog(log, iclog); - if (error) - return error; - } else { - spin_unlock(&log->l_icloglock); - } + spin_unlock(&log->l_icloglock); + if (error) + return error; goto restart; } @@ -3161,60 +3150,6 @@ xlog_ungrant_log_space( } /* - * Flush iclog to disk if this is the last reference to the given iclog and - * the WANT_SYNC bit is set. - * - * When this function is entered, the iclog is not necessarily in the - * WANT_SYNC state. It may be sitting around waiting to get filled. - * - * - */ -STATIC int -xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - int sync = 0; /* do we sync? */ - - if (iclog->ic_state & XLOG_STATE_IOERROR) - return -EIO; - - ASSERT(atomic_read(&iclog->ic_refcnt) > 0); - if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) - return 0; - - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return -EIO; - } - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_WANT_SYNC); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - sync++; - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - } - spin_unlock(&log->l_icloglock); - - /* - * We let the log lock go, so it's possible that we hit a log I/O - * error or some other SHUTDOWN condition that marks the iclog - * as XLOG_STATE_IOERROR before the bwrite. However, we know that - * this iclog has consistent data, so we ignore IOERROR - * flags after this point. - */ - if (sync) - xlog_sync(log, iclog); - return 0; -} /* xlog_state_release_iclog */ - - -/* * This routine will mark the current iclog in the ring as WANT_SYNC * and move the current iclog pointer to the next iclog in the ring. * When this routine is called from xlog_state_get_iclog_space(), the @@ -3307,7 +3242,7 @@ xfs_log_force( spin_lock(&log->l_icloglock); iclog = log->l_iclog; - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) goto out_error; if (iclog->ic_state == XLOG_STATE_DIRTY || @@ -3337,12 +3272,9 @@ xfs_log_force( atomic_inc(&iclog->ic_refcnt); lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - spin_unlock(&log->l_icloglock); - if (xlog_state_release_iclog(log, iclog)) - return -EIO; + goto out_error; - spin_lock(&log->l_icloglock); if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn || iclog->ic_state == XLOG_STATE_DIRTY) goto out_unlock; @@ -3367,11 +3299,11 @@ xfs_log_force( if (!(flags & XFS_LOG_SYNC)) goto out_unlock; - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) goto out_error; XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; return 0; @@ -3396,7 +3328,7 @@ __xfs_log_force_lsn( spin_lock(&log->l_icloglock); iclog = log->l_iclog; - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) goto out_error; while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { @@ -3425,10 +3357,8 @@ __xfs_log_force_lsn( * will go out then. */ if (!already_slept && - (iclog->ic_prev->ic_state & - (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { - ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - + (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || + iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_prev->ic_write_wait, @@ -3437,24 +3367,23 @@ __xfs_log_force_lsn( } atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); - spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) - return -EIO; + goto out_error; if (log_flushed) *log_flushed = 1; - spin_lock(&log->l_icloglock); } if (!(flags & XFS_LOG_SYNC) || - (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) + (iclog->ic_state == XLOG_STATE_ACTIVE || + iclog->ic_state == XLOG_STATE_DIRTY)) goto out_unlock; - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) goto out_error; XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); - if (iclog->ic_state & XLOG_STATE_IOERROR) + if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; return 0; @@ -3517,8 +3446,8 @@ xlog_state_want_sync( if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); } else { - ASSERT(iclog->ic_state & - (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + iclog->ic_state == XLOG_STATE_IOERROR); } } @@ -3539,7 +3468,7 @@ xfs_log_ticket_put( { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) - kmem_zone_free(xfs_log_ticket_zone, ticket); + kmem_cache_free(xfs_log_ticket_zone, ticket); } xlog_ticket_t * @@ -3895,7 +3824,7 @@ xlog_state_ioerror( xlog_in_core_t *iclog, *ic; iclog = log->l_iclog; - if (! (iclog->ic_state & XLOG_STATE_IOERROR)) { + if (iclog->ic_state != XLOG_STATE_IOERROR) { /* * Mark all the incore logs IOERROR. * From now on, no log flushes will result. @@ -3955,7 +3884,7 @@ xfs_log_force_umount( * Somebody could've already done the hard work for us. * No need to get locks for this. */ - if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) { + if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { ASSERT(XLOG_FORCED_SHUTDOWN(log)); return 1; } @@ -4006,21 +3935,8 @@ xfs_log_force_umount( spin_lock(&log->l_cilp->xc_push_lock); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); - xlog_state_do_callback(log, true, NULL); - -#ifdef XFSERRORDEBUG - { - xlog_in_core_t *iclog; + xlog_state_do_callback(log, true); - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - do { - ASSERT(iclog->ic_callback == 0); - iclog = iclog->ic_next; - } while (iclog != log->l_iclog); - spin_unlock(&log->l_icloglock); - } -#endif /* return non-zero if log IOERROR transition had already happened */ return retval; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ef652abd112c..48435cf2aa16 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -179,7 +179,7 @@ xlog_cil_alloc_shadow_bufs( /* * We free and allocate here as a realloc would copy - * unecessary data. We don't use kmem_zalloc() for the + * unnecessary data. We don't use kmem_zalloc() for the * same reason - we don't need to zero the data area in * the buffer, only the log vector header and the iovec * storage. @@ -682,7 +682,7 @@ xlog_cil_push( } - /* check for a previously pushed seqeunce */ + /* check for a previously pushed sequence */ if (push_seq < cil->xc_ctx->sequence) { spin_unlock(&cil->xc_push_lock); goto out_skip; @@ -847,7 +847,7 @@ restart: goto out_abort; spin_lock(&commit_iclog->ic_callback_lock); - if (commit_iclog->ic_state & XLOG_STATE_IOERROR) { + if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { spin_unlock(&commit_iclog->ic_callback_lock); goto out_abort; } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b880c23cb6e4..b192c5a9f9fd 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -40,17 +40,15 @@ static inline uint xlog_get_client_id(__be32 i) /* * In core log state */ -#define XLOG_STATE_ACTIVE 0x0001 /* Current IC log being written to */ -#define XLOG_STATE_WANT_SYNC 0x0002 /* Want to sync this iclog; no more writes */ -#define XLOG_STATE_SYNCING 0x0004 /* This IC log is syncing */ -#define XLOG_STATE_DONE_SYNC 0x0008 /* Done syncing to disk */ -#define XLOG_STATE_DO_CALLBACK \ - 0x0010 /* Process callback functions */ -#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ -#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ -#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ -#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ -#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ +enum xlog_iclog_state { + XLOG_STATE_ACTIVE, /* Current IC log being written to */ + XLOG_STATE_WANT_SYNC, /* Want to sync this iclog; no more writes */ + XLOG_STATE_SYNCING, /* This IC log is syncing */ + XLOG_STATE_DONE_SYNC, /* Done syncing to disk */ + XLOG_STATE_CALLBACK, /* Callback functions now */ + XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */ + XLOG_STATE_IOERROR, /* IO error happened in sync'ing log */ +}; /* * Flags to log ticket @@ -179,8 +177,6 @@ typedef struct xlog_ticket { * - ic_next is the pointer to the next iclog in the ring. * - ic_log is a pointer back to the global log structure. * - ic_size is the full size of the log buffer, minus the cycle headers. - * - ic_io_size is the size of the currently pending log buffer write, which - * might be smaller than ic_size * - ic_offset is the current number of bytes written to in this iclog. * - ic_refcnt is bumped when someone is writing to the log. * - ic_state is the state of the iclog. @@ -205,9 +201,8 @@ typedef struct xlog_in_core { struct xlog_in_core *ic_prev; struct xlog *ic_log; u32 ic_size; - u32 ic_io_size; u32 ic_offset; - unsigned short ic_state; + enum xlog_iclog_state ic_state; char *ic_datap; /* pointer to iclog data */ /* Callback structures need their own cacheline */ @@ -399,8 +394,6 @@ struct xlog { /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG void *l_iclog_bak[XLOG_MAX_ICLOGS]; - /* log record crc error injection factor */ - uint32_t l_badcrc_factor; #endif /* log recovery lsn tracking (for buffer submission */ xfs_lsn_t l_recovery_lsn; @@ -542,7 +535,11 @@ xlog_cil_force(struct xlog *log) * by a spinlock. This matches the semantics of all the wait queues used in the * log code. */ -static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) +static inline void +xlog_wait( + struct wait_queue_head *wq, + struct spinlock *lock) + __releases(lock) { DECLARE_WAITQUEUE(wait, current); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index c1a514ffff55..99ec3fba4548 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -103,10 +103,9 @@ xlog_alloc_buffer( * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. */ - if (!xlog_verify_bno(log, 0, nbblks)) { + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); return NULL; } @@ -152,11 +151,10 @@ xlog_do_io( { int error; - if (!xlog_verify_bno(log, blk_no, nbblks)) { + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) { xfs_warn(log->l_mp, "Invalid log block/length (0x%llx, 0x%x) for buffer", blk_no, nbblks); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); return -EFSCORRUPTED; } @@ -244,19 +242,17 @@ xlog_header_check_recover( * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ - if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { + if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) { xfs_warn(mp, "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); - XFS_ERROR_REPORT("xlog_header_check_recover(1)", - XFS_ERRLEVEL_HIGH, mp); return -EFSCORRUPTED; - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + } + if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, + &head->h_fs_uuid))) { xfs_warn(mp, "dirty log entry has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); - XFS_ERROR_REPORT("xlog_header_check_recover(2)", - XFS_ERRLEVEL_HIGH, mp); return -EFSCORRUPTED; } return 0; @@ -279,11 +275,10 @@ xlog_header_check_mount( * by IRIX and continue. */ xfs_warn(mp, "null uuid in log - IRIX style log"); - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, + &head->h_fs_uuid))) { xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); - XFS_ERROR_REPORT("xlog_header_check_mount", - XFS_ERRLEVEL_HIGH, mp); return -EFSCORRUPTED; } return 0; @@ -471,7 +466,7 @@ xlog_find_verify_log_record( xfs_warn(log->l_mp, "Log inconsistent (didn't find previous header)"); ASSERT(0); - error = -EIO; + error = -EFSCORRUPTED; goto out; } @@ -1347,10 +1342,11 @@ xlog_find_tail( error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, &rhead_blk, &rhead, &wrapped); if (error < 0) - return error; + goto done; if (!error) { xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); - return -EIO; + error = -EFSCORRUPTED; + goto done; } *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); @@ -1699,11 +1695,10 @@ xlog_clear_stale_blocks( * the distance from the beginning of the log to the * tail. */ - if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { - XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", - XFS_ERRLEVEL_LOW, log->l_mp); + if (XFS_IS_CORRUPT(log->l_mp, + head_block < tail_block || + head_block >= log->l_logBBsize)) return -EFSCORRUPTED; - } tail_distance = tail_block + (log->l_logBBsize - head_block); } else { /* @@ -1711,11 +1706,10 @@ xlog_clear_stale_blocks( * so the distance from the head to the tail is just * the tail block minus the head block. */ - if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ - XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", - XFS_ERRLEVEL_LOW, log->l_mp); + if (XFS_IS_CORRUPT(log->l_mp, + head_block >= tail_block || + head_cycle != tail_cycle + 1)) return -EFSCORRUPTED; - } tail_distance = tail_block - head_block; } @@ -2135,13 +2129,11 @@ xlog_recover_do_inode_buffer( */ logged_nextp = item->ri_buf[item_index].i_addr + next_unlinked_offset - reg_buf_offset; - if (unlikely(*logged_nextp == 0)) { + if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) { xfs_alert(mp, "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " "Trying to replay bad (0) inode di_next_unlinked field.", item, bp); - XFS_ERROR_REPORT("xlog_recover_do_inode_buf", - XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } @@ -2576,6 +2568,7 @@ xlog_recover_do_reg_buffer( int bit; int nbits; xfs_failaddr_t fa; + const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot); trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); @@ -2618,7 +2611,7 @@ xlog_recover_do_reg_buffer( "XFS: NULL dquot in %s.", __func__); goto next; } - if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { + if (item->ri_buf[i].i_len < size_disk_dquot) { xfs_alert(mp, "XFS: dquot too small (%d) in %s.", item->ri_buf[i].i_len, __func__); @@ -2969,22 +2962,18 @@ xlog_recover_inode_pass2( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(!xfs_verify_magic16(bp, dip->di_magic))) { + if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) { xfs_alert(mp, "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", __func__, dip, bp, in_f->ilf_ino); - XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", - XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto out_release; } ldip = item->ri_buf[1].i_addr; - if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { + if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) { xfs_alert(mp, "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", __func__, item, in_f->ilf_ino); - XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", - XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto out_release; } @@ -3166,7 +3155,7 @@ xlog_recover_inode_pass2( default: xfs_warn(log->l_mp, "%s: Invalid flag", __func__); ASSERT(0); - error = -EIO; + error = -EFSCORRUPTED; goto out_release; } } @@ -3247,12 +3236,12 @@ xlog_recover_dquot_pass2( recddq = item->ri_buf[1].i_addr; if (recddq == NULL) { xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); - return -EIO; + return -EFSCORRUPTED; } - if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { + if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) { xfs_alert(log->l_mp, "dquot too small (%d) in %s.", item->ri_buf[1].i_len, __func__); - return -EIO; + return -EFSCORRUPTED; } /* @@ -3279,7 +3268,7 @@ xlog_recover_dquot_pass2( if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", dq_f->qlf_id, fa); - return -EIO; + return -EFSCORRUPTED; } ASSERT(dq_f->qlf_len == 1); @@ -3537,6 +3526,7 @@ xfs_cui_copy_format( memcpy(dst_cui_fmt, src_cui_fmt, len); return 0; } + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; } @@ -3601,8 +3591,10 @@ xlog_recover_cud_pass2( struct xfs_ail *ailp = log->l_ailp; cud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) + if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; + } cui_id = cud_formatp->cud_cui_id; /* @@ -3654,6 +3646,7 @@ xfs_bui_copy_format( memcpy(dst_bui_fmt, src_bui_fmt, len); return 0; } + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; } @@ -3677,8 +3670,10 @@ xlog_recover_bui_pass2( bui_formatp = item->ri_buf[0].i_addr; - if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; + } buip = xfs_bui_init(mp); error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); if (error) { @@ -3720,8 +3715,10 @@ xlog_recover_bud_pass2( struct xfs_ail *ailp = log->l_ailp; bud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) + if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; + } bui_id = bud_formatp->bud_bui_id; /* @@ -4018,7 +4015,7 @@ xlog_recover_commit_pass1( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return -EIO; + return -EFSCORRUPTED; } } @@ -4066,7 +4063,7 @@ xlog_recover_commit_pass2( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return -EIO; + return -EFSCORRUPTED; } } @@ -4187,7 +4184,7 @@ xlog_recover_add_to_cont_trans( ASSERT(len <= sizeof(struct xfs_trans_header)); if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); - return -EIO; + return -EFSCORRUPTED; } xlog_recover_add_item(&trans->r_itemq); @@ -4243,13 +4240,13 @@ xlog_recover_add_to_trans( xfs_warn(log->l_mp, "%s: bad header magic number", __func__); ASSERT(0); - return -EIO; + return -EFSCORRUPTED; } if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); ASSERT(0); - return -EIO; + return -EFSCORRUPTED; } /* @@ -4285,7 +4282,7 @@ xlog_recover_add_to_trans( in_f->ilf_size); ASSERT(0); kmem_free(ptr); - return -EIO; + return -EFSCORRUPTED; } item->ri_total = in_f->ilf_size; @@ -4293,7 +4290,16 @@ xlog_recover_add_to_trans( kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), 0); } - ASSERT(item->ri_total > item->ri_cnt); + + if (item->ri_total <= item->ri_cnt) { + xfs_warn(log->l_mp, + "log item region count (%d) overflowed size (%d)", + item->ri_cnt, item->ri_total); + ASSERT(0); + kmem_free(ptr); + return -EFSCORRUPTED; + } + /* Description region is ri_buf[0] */ item->ri_buf[item->ri_cnt].i_addr = ptr; item->ri_buf[item->ri_cnt].i_len = len; @@ -4380,7 +4386,7 @@ xlog_recovery_process_trans( default: xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); ASSERT(0); - error = -EIO; + error = -EFSCORRUPTED; break; } if (error || freeit) @@ -4460,7 +4466,7 @@ xlog_recover_process_ophdr( xfs_warn(log->l_mp, "%s: bad clientid 0x%x", __func__, ohead->oh_clientid); ASSERT(0); - return -EIO; + return -EFSCORRUPTED; } /* @@ -4470,7 +4476,7 @@ xlog_recover_process_ophdr( if (dp + len > end) { xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); WARN_ON(1); - return -EIO; + return -EFSCORRUPTED; } trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); @@ -5172,8 +5178,10 @@ xlog_recover_process( * If the filesystem is CRC enabled, this mismatch becomes a * fatal log corruption failure. */ - if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) + if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; + } } xlog_unpack_data(rhead, dp, log); @@ -5190,31 +5198,25 @@ xlog_valid_rec_header( { int hlen; - if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { - XFS_ERROR_REPORT("xlog_valid_rec_header(1)", - XFS_ERRLEVEL_LOW, log->l_mp); + if (XFS_IS_CORRUPT(log->l_mp, + rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) return -EFSCORRUPTED; - } - if (unlikely( - (!rhead->h_version || - (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { + if (XFS_IS_CORRUPT(log->l_mp, + (!rhead->h_version || + (be32_to_cpu(rhead->h_version) & + (~XLOG_VERSION_OKBITS))))) { xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", __func__, be32_to_cpu(rhead->h_version)); - return -EIO; + return -EFSCORRUPTED; } /* LR body must have data or it wouldn't have been written */ hlen = be32_to_cpu(rhead->h_len); - if (unlikely( hlen <= 0 || hlen > INT_MAX )) { - XFS_ERROR_REPORT("xlog_valid_rec_header(2)", - XFS_ERRLEVEL_LOW, log->l_mp); + if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX)) return -EFSCORRUPTED; - } - if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { - XFS_ERROR_REPORT("xlog_valid_rec_header(3)", - XFS_ERRLEVEL_LOW, log->l_mp); + if (XFS_IS_CORRUPT(log->l_mp, + blkno > log->l_logBBsize || blkno > INT_MAX)) return -EFSCORRUPTED; - } return 0; } @@ -5296,8 +5298,12 @@ xlog_do_recovery_pass( "invalid iclog size (%d bytes), using lsunit (%d bytes)", h_size, log->l_mp->m_logbsize); h_size = log->l_mp->m_logbsize; - } else - return -EFSCORRUPTED; + } else { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, + log->l_mp); + error = -EFSCORRUPTED; + goto bread_err1; + } } if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 9804efe525a9..e0f9d3b6abe9 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -20,8 +20,8 @@ __xfs_printk( const struct xfs_mount *mp, struct va_format *vaf) { - if (mp && mp->m_fsname) { - printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + if (mp && mp->m_super) { + printk("%sXFS (%s): %pV\n", level, mp->m_super->s_id, vaf); return; } printk("%sXFS: %pV\n", level, vaf); @@ -86,17 +86,25 @@ xfs_alert_tag( } void -asswarn(char *expr, char *file, int line) +asswarn( + struct xfs_mount *mp, + char *expr, + char *file, + int line) { - xfs_warn(NULL, "Assertion failed: %s, file: %s, line: %d", + xfs_warn(mp, "Assertion failed: %s, file: %s, line: %d", expr, file, line); WARN_ON(1); } void -assfail(char *expr, char *file, int line) +assfail( + struct xfs_mount *mp, + char *expr, + char *file, + int line) { - xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", + xfs_emerg(mp, "Assertion failed: %s, file: %s, line: %d", expr, file, line); if (xfs_globals.bug_on_assert) BUG(); @@ -105,7 +113,7 @@ assfail(char *expr, char *file, int line) } void -xfs_hex_dump(void *p, int length) +xfs_hex_dump(const void *p, int length) { print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); } diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 34447dca97d1..0b05e10995a0 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -57,9 +57,9 @@ do { \ #define xfs_debug_ratelimited(dev, fmt, ...) \ xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__) -extern void assfail(char *expr, char *f, int l); -extern void asswarn(char *expr, char *f, int l); +void assfail(struct xfs_mount *mp, char *expr, char *f, int l); +void asswarn(struct xfs_mount *mp, char *expr, char *f, int l); -extern void xfs_hex_dump(void *p, int length); +extern void xfs_hex_dump(const void *p, int length); #endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ba5b6f3b2b88..fca65109cf24 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -426,45 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp) } /* - * Set the default minimum read and write sizes unless - * already specified in a mount option. - * We use smaller I/O sizes when the file system - * is being used for NFS service (wsync mount option). - */ -STATIC void -xfs_set_rw_sizes(xfs_mount_t *mp) -{ - xfs_sb_t *sbp = &(mp->m_sb); - int readio_log, writeio_log; - - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { - if (mp->m_flags & XFS_MOUNT_WSYNC) { - readio_log = XFS_WSYNC_READIO_LOG; - writeio_log = XFS_WSYNC_WRITEIO_LOG; - } else { - readio_log = XFS_READIO_LOG_LARGE; - writeio_log = XFS_WRITEIO_LOG_LARGE; - } - } else { - readio_log = mp->m_readio_log; - writeio_log = mp->m_writeio_log; - } - - if (sbp->sb_blocklog > readio_log) { - mp->m_readio_log = sbp->sb_blocklog; - } else { - mp->m_readio_log = readio_log; - } - mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); - if (sbp->sb_blocklog > writeio_log) { - mp->m_writeio_log = sbp->sb_blocklog; - } else { - mp->m_writeio_log = writeio_log; - } - mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); -} - -/* * precalculate the low space thresholds for dynamic speculative preallocation. */ void @@ -706,7 +667,8 @@ xfs_mountfs( /* enable fail_at_unmount as default */ mp->m_fail_unmount = true; - error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, + NULL, mp->m_super->s_id); if (error) goto out; @@ -728,9 +690,12 @@ xfs_mountfs( goto out_remove_errortag; /* - * Set the minimum read and write sizes + * Update the preferred write size based on the information from the + * on-disk superblock. */ - xfs_set_rw_sizes(mp); + mp->m_allocsize_log = + max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log); + mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog); /* set the low space thresholds for dynamic preallocation */ xfs_set_low_space_thresholds(mp); @@ -796,9 +761,8 @@ xfs_mountfs( goto out_free_dir; } - if (!sbp->sb_logblocks) { + if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) { xfs_warn(mp, "no log defined"); - XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto out_free_perag; } @@ -836,12 +800,10 @@ xfs_mountfs( ASSERT(rip != NULL); - if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) { + if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) { xfs_warn(mp, "corrupted root inode %llu: not a directory", (unsigned long long)rip->i_ino); xfs_iunlock(rip, XFS_ILOCK_EXCL); - XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, - mp); error = -EFSCORRUPTED; goto out_rele_rip; } @@ -1277,7 +1239,7 @@ xfs_mod_fdblocks( printk_once(KERN_WARNING "Filesystem \"%s\": reserve blocks depleted! " "Consider increasing reserve pool size.", - mp->m_fsname); + mp->m_super->s_id); fdblocks_enospc: spin_unlock(&mp->m_sb_lock); return -ENOSPC; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index fdb60e09a9c5..88ab09ed29e7 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -9,10 +9,8 @@ struct xlog; struct xfs_inode; struct xfs_mru_cache; -struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; -struct xfs_dir_ops; struct xfs_da_geometry; /* dynamic preallocation free space thresholds, 5% down to 1% */ @@ -59,7 +57,6 @@ struct xfs_error_cfg { typedef struct xfs_mount { struct super_block *m_super; - xfs_tid_t m_tid; /* next unused tid for fs */ /* * Bitsets of per-fs metadata that have been checked and/or are sick. @@ -89,8 +86,6 @@ typedef struct xfs_mount { struct percpu_counter m_delalloc_blks; struct xfs_buf *m_sb_bp; /* buffer for superblock */ - char *m_fsname; /* filesystem name */ - int m_fsname_len; /* strlen of fs name */ char *m_rtname; /* realtime device name */ char *m_logname; /* external log device name */ int m_bsize; /* fs logical block size */ @@ -98,10 +93,8 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - uint m_readio_log; /* min read size log bytes */ - uint m_readio_blocks; /* min read size blocks */ - uint m_writeio_log; /* min write size log bytes */ - uint m_writeio_blocks; /* min write size blocks */ + uint m_allocsize_log;/* min write size log bytes */ + uint m_allocsize_blocks; /* min write size blocks */ struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ @@ -159,10 +152,6 @@ typedef struct xfs_mount { int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ - const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ - const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ - const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ - uint m_chsize; /* size of next field */ atomic_t m_active_trans; /* number trans frozen */ struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct delayed_work m_reclaim_work; /* background inode reclaim */ @@ -229,7 +218,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ #define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ -#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ +#define XFS_MOUNT_ALLOCSIZE (1ULL << 12) /* specified allocation size */ #define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */ #define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */ #define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */ @@ -238,7 +227,7 @@ typedef struct xfs_mount { * allocation */ #define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */ #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ -#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred +#define XFS_MOUNT_LARGEIO (1ULL << 22) /* report large preferred * I/O size in stat() */ #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams allocator */ @@ -246,13 +235,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */ - -/* - * Default minimum read and write sizes. - */ -#define XFS_READIO_LOG_LARGE 16 -#define XFS_WRITEIO_LOG_LARGE 16 - /* * Max and min values for mount-option defined I/O * preallocation sizes. @@ -260,37 +242,6 @@ typedef struct xfs_mount { #define XFS_MAX_IO_LOG 30 /* 1G */ #define XFS_MIN_IO_LOG PAGE_SHIFT -/* - * Synchronous read and write sizes. This should be - * better for NFSv2 wsync filesystems. - */ -#define XFS_WSYNC_READIO_LOG 15 /* 32k */ -#define XFS_WSYNC_WRITEIO_LOG 14 /* 16k */ - -/* - * Allow large block sizes to be reported to userspace programs if the - * "largeio" mount option is used. - * - * If compatibility mode is specified, simply return the basic unit of caching - * so that we don't get inefficient read/modify/write I/O from user apps. - * Otherwise.... - * - * If the underlying volume is a stripe, then return the stripe width in bytes - * as the recommended I/O size. It is not a stripe and we've set a default - * buffered I/O size, return that, otherwise return the compat default. - */ -static inline unsigned long -xfs_preferred_iosize(xfs_mount_t *mp) -{ - if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE) - return PAGE_SIZE; - return (mp->m_swidth ? - (mp->m_swidth << mp->m_sb.sb_blocklog) : - ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ? - (1 << (int)max(mp->m_readio_log, mp->m_writeio_log)) : - PAGE_SIZE)); -} - #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) #define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 9c96493be9e0..bb3008d390aa 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -12,6 +12,7 @@ #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_iomap.h" +#include "xfs_pnfs.h" /* * Ensure that we do not have any outstanding pNFS layouts that can be used by @@ -59,7 +60,7 @@ xfs_fs_get_uuid( printk_once(KERN_NOTICE "XFS (%s): using experimental pNFS feature, use at your own risk!\n", - mp->m_fsname); + mp->m_super->s_id); if (*len < sizeof(uuid_t)) return -EINVAL; @@ -142,39 +143,34 @@ xfs_fs_map_blocks( lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); - xfs_iunlock(ip, lock_flags); - if (error) - goto out_unlock; + ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); + + if (!error && write && + (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) { + if (offset + length > XFS_ISIZE(ip)) + end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); + else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) + end_fsb = min(end_fsb, imap.br_startoff + + imap.br_blockcount); + xfs_iunlock(ip, lock_flags); + + error = xfs_iomap_write_direct(ip, offset_fsb, + end_fsb - offset_fsb, &imap); + if (error) + goto out_unlock; - if (write) { - enum xfs_prealloc_flags flags = 0; - - ASSERT(imap.br_startblock != DELAYSTARTBLOCK); - - if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { - /* - * xfs_iomap_write_direct() expects to take ownership of - * the shared ilock. - */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_iomap_write_direct(ip, offset, length, - &imap, nimaps); - if (error) - goto out_unlock; - - /* - * Ensure the next transaction is committed - * synchronously so that the blocks allocated and - * handed out to the client are guaranteed to be - * present even after a server crash. - */ - flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC; - } - - error = xfs_update_prealloc_flags(ip, flags); + /* + * Ensure the next transaction is committed synchronously so + * that the blocks allocated and handed out to the client are + * guaranteed to be present even after a server crash. + */ + error = xfs_update_prealloc_flags(ip, + XFS_PREALLOC_SET | XFS_PREALLOC_SYNC); if (error) goto out_unlock; + } else { + xfs_iunlock(ip, lock_flags); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ecd8ce152ab1..0b0909657bad 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -22,6 +22,7 @@ #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_error.h" /* * The global quota manager. There is only one of these for the entire @@ -29,10 +30,10 @@ * quota functionality, including maintaining the freelist and hash * tables of dquots. */ -STATIC int xfs_qm_init_quotainos(xfs_mount_t *); -STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); +STATIC int xfs_qm_init_quotainos(struct xfs_mount *mp); +STATIC int xfs_qm_init_quotainfo(struct xfs_mount *mp); -STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi); +STATIC void xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it @@ -243,14 +244,14 @@ xfs_qm_unmount_quotas( STATIC int xfs_qm_dqattach_one( - xfs_inode_t *ip, - xfs_dqid_t id, - uint type, - bool doalloc, - xfs_dquot_t **IO_idqpp) + struct xfs_inode *ip, + xfs_dqid_t id, + uint type, + bool doalloc, + struct xfs_dquot **IO_idqpp) { - xfs_dquot_t *dqp; - int error; + struct xfs_dquot *dqp; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); error = 0; @@ -341,7 +342,7 @@ xfs_qm_dqattach_locked( } if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) { - error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, + error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, doalloc, &ip->i_pdquot); if (error) goto done; @@ -539,12 +540,12 @@ xfs_qm_shrink_count( STATIC void xfs_qm_set_defquota( - xfs_mount_t *mp, - uint type, - xfs_quotainfo_t *qinf) + struct xfs_mount *mp, + uint type, + struct xfs_quotainfo *qinf) { - xfs_dquot_t *dqp; - struct xfs_def_quota *defq; + struct xfs_dquot *dqp; + struct xfs_def_quota *defq; struct xfs_disk_dquot *ddqp; int error; @@ -642,7 +643,7 @@ xfs_qm_init_quotainfo( ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), 0); + qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0); error = list_lru_init(&qinf->qi_lru); if (error) @@ -709,9 +710,9 @@ out_free_qinf: */ void xfs_qm_destroy_quotainfo( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_quotainfo_t *qi; + struct xfs_quotainfo *qi; qi = mp->m_quotainfo; ASSERT(qi != NULL); @@ -754,11 +755,15 @@ xfs_qm_qino_alloc( if ((flags & XFS_QMOPT_PQUOTA) && (mp->m_sb.sb_gquotino != NULLFSINO)) { ino = mp->m_sb.sb_gquotino; - ASSERT(mp->m_sb.sb_pquotino == NULLFSINO); + if (XFS_IS_CORRUPT(mp, + mp->m_sb.sb_pquotino != NULLFSINO)) + return -EFSCORRUPTED; } else if ((flags & XFS_QMOPT_GQUOTA) && (mp->m_sb.sb_pquotino != NULLFSINO)) { ino = mp->m_sb.sb_pquotino; - ASSERT(mp->m_sb.sb_gquotino == NULLFSINO); + if (XFS_IS_CORRUPT(mp, + mp->m_sb.sb_gquotino != NULLFSINO)) + return -EFSCORRUPTED; } if (ino != NULLFSINO) { error = xfs_iget(mp, NULL, ino, 0, 0, ip); @@ -1559,7 +1564,7 @@ error_rele: STATIC void xfs_qm_destroy_quotainos( - xfs_quotainfo_t *qi) + struct xfs_quotainfo *qi) { if (qi->qi_uquotaip) { xfs_irele(qi->qi_uquotaip); @@ -1693,7 +1698,7 @@ xfs_qm_vop_dqalloc( } } if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { - if (xfs_get_projid(ip) != prid) { + if (ip->i_d.di_projid != prid) { xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, (xfs_dqid_t)prid, XFS_DQ_PROJ, true, &pq); @@ -1737,14 +1742,14 @@ error_rele: * Actually transfer ownership, and do dquot modifications. * These were already reserved. */ -xfs_dquot_t * +struct xfs_dquot * xfs_qm_vop_chown( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t **IO_olddq, - xfs_dquot_t *newdq) + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot **IO_olddq, + struct xfs_dquot *newdq) { - xfs_dquot_t *prevdq; + struct xfs_dquot *prevdq; uint bfield = XFS_IS_REALTIME_INODE(ip) ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; @@ -1827,7 +1832,7 @@ xfs_qm_vop_chown_reserve( } if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && - xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) { + ip->i_d.di_projid != be32_to_cpu(pdqp->q_core.d_id)) { prjflags = XFS_QMOPT_ENOSPC; pdq_delblks = pdqp; if (delblks) { @@ -1928,7 +1933,7 @@ xfs_qm_vop_create_dqattach( } if (pdqp && XFS_IS_PQUOTA_ON(mp)) { ASSERT(ip->i_pdquot == NULL); - ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); + ASSERT(ip->i_d.di_projid == be32_to_cpu(pdqp->q_core.d_id)); ip->i_pdquot = xfs_qm_dqhold(pdqp); xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index b41b75089548..7823af39008b 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -54,7 +54,7 @@ struct xfs_def_quota { * Various quota information for individual filesystems. * The mount structure keeps a pointer to this. */ -typedef struct xfs_quotainfo { +struct xfs_quotainfo { struct radix_tree_root qi_uquota_tree; struct radix_tree_root qi_gquota_tree; struct radix_tree_root qi_pquota_tree; @@ -76,8 +76,8 @@ typedef struct xfs_quotainfo { struct xfs_def_quota qi_usr_default; struct xfs_def_quota qi_grp_default; struct xfs_def_quota qi_prj_default; - struct shrinker qi_shrinker; -} xfs_quotainfo_t; + struct shrinker qi_shrinker; +}; static inline struct radix_tree_root * xfs_dquot_tree( diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 5d72e88598b4..fc2fa418919f 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -54,13 +54,13 @@ xfs_fill_statvfs_from_dquot( */ void xfs_qm_statvfs( - xfs_inode_t *ip, + struct xfs_inode *ip, struct kstatfs *statp) { - xfs_mount_t *mp = ip->i_mount; - xfs_dquot_t *dqp; + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *dqp; - if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) { + if (!xfs_qm_dqget(mp, ip->i_d.di_projid, XFS_DQ_PROJ, false, &dqp)) { xfs_fill_statvfs_from_dquot(statp, dqp); xfs_qm_dqput(dqp); } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index da7ad0383037..1ea82764bf89 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -19,9 +19,72 @@ #include "xfs_qm.h" #include "xfs_icache.h" -STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); -STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, - uint); +STATIC int +xfs_qm_log_quotaoff( + struct xfs_mount *mp, + struct xfs_qoff_logitem **qoffstartp, + uint flags) +{ + struct xfs_trans *tp; + int error; + struct xfs_qoff_logitem *qoffi; + + *qoffstartp = NULL; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); + if (error) + goto out; + + qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + xfs_log_sb(tp); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp); + if (error) + goto out; + + *qoffstartp = qoffi; +out: + return error; +} + +STATIC int +xfs_qm_log_quotaoff_end( + struct xfs_mount *mp, + struct xfs_qoff_logitem *startqoff, + uint flags) +{ + struct xfs_trans *tp; + int error; + struct xfs_qoff_logitem *qoffi; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); + if (error) + return error; + + qoffi = xfs_trans_get_qoff_item(tp, startqoff, + flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp); +} /* * Turn off quota accounting and/or enforcement for all udquots and/or @@ -40,7 +103,7 @@ xfs_qm_scall_quotaoff( uint dqtype; int error; uint inactivate_flags; - xfs_qoff_logitem_t *qoffstart; + struct xfs_qoff_logitem *qoffstart; /* * No file system can have quotas enabled on disk but not in core. @@ -538,74 +601,6 @@ out_unlock: return error; } -STATIC int -xfs_qm_log_quotaoff_end( - xfs_mount_t *mp, - xfs_qoff_logitem_t *startqoff, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); - if (error) - return error; - - qoffi = xfs_trans_get_qoff_item(tp, startqoff, - flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp); -} - - -STATIC int -xfs_qm_log_quotaoff( - xfs_mount_t *mp, - xfs_qoff_logitem_t **qoffstartp, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi; - - *qoffstartp = NULL; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); - if (error) - goto out; - - qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - xfs_log_sb(tp); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp); - if (error) - goto out; - - *qoffstartp = qoffi; -out: - return error; -} - /* Fill out the quota context. */ static void xfs_qm_scall_getquota_fill_qc( diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index cd6c7210a373..c7de17deeae6 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -201,6 +201,9 @@ xfs_fs_rm_xquota( if (XFS_IS_QUOTA_ON(mp)) return -EINVAL; + if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA)) + return -EINVAL; + if (uflags & FS_USER_QUOTA) flags |= XFS_DQ_USER; if (uflags & FS_GROUP_QUOTA) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 2328268e6245..8eeed73928cd 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -17,7 +17,7 @@ #include "xfs_refcount_item.h" #include "xfs_log.h" #include "xfs_refcount.h" - +#include "xfs_error.h" kmem_zone_t *xfs_cui_zone; kmem_zone_t *xfs_cud_zone; @@ -34,7 +34,7 @@ xfs_cui_item_free( if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else - kmem_zone_free(xfs_cui_zone, cuip); + kmem_cache_free(xfs_cui_zone, cuip); } /* @@ -206,7 +206,7 @@ xfs_cud_item_release( struct xfs_cud_log_item *cudp = CUD_ITEM(lip); xfs_cui_release(cudp->cud_cuip); - kmem_zone_free(xfs_cud_zone, cudp); + kmem_cache_free(xfs_cud_zone, cudp); } static const struct xfs_item_ops xfs_cud_item_ops = { @@ -497,7 +497,7 @@ xfs_cui_recover( */ set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); xfs_cui_release(cuip); - return -EIO; + return -EFSCORRUPTED; } } @@ -536,6 +536,7 @@ xfs_cui_recover( type = refc_type; break; default: + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); error = -EFSCORRUPTED; goto abort_error; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a9634110c783..de451235c4ee 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -308,13 +308,13 @@ static int xfs_find_trim_cow_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, bool *found) { xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; struct xfs_iext_cursor icur; - struct xfs_bmbt_irec got; *found = false; @@ -322,23 +322,22 @@ xfs_find_trim_cow_extent( * If we don't find an overlapping extent, trim the range we need to * allocate to fit the hole we found. */ - if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got)) - got.br_startoff = offset_fsb + count_fsb; - if (got.br_startoff > offset_fsb) { + if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, cmap)) + cmap->br_startoff = offset_fsb + count_fsb; + if (cmap->br_startoff > offset_fsb) { xfs_trim_extent(imap, imap->br_startoff, - got.br_startoff - imap->br_startoff); + cmap->br_startoff - imap->br_startoff); return xfs_inode_need_cow(ip, imap, shared); } *shared = true; - if (isnullstartblock(got.br_startblock)) { - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + if (isnullstartblock(cmap->br_startblock)) { + xfs_trim_extent(imap, cmap->br_startoff, cmap->br_blockcount); return 0; } /* real extent found - no need to allocate */ - xfs_trim_extent(&got, offset_fsb, count_fsb); - *imap = got; + xfs_trim_extent(cmap, offset_fsb, count_fsb); *found = true; return 0; } @@ -348,6 +347,7 @@ int xfs_reflink_allocate_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now) @@ -367,7 +367,7 @@ xfs_reflink_allocate_cow( xfs_ifork_init_cow(ip); } - error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) return error; if (found) @@ -392,7 +392,7 @@ xfs_reflink_allocate_cow( /* * Check for an overlapping extent again now that we dropped the ilock. */ - error = xfs_find_trim_cow_extent(ip, imap, shared, &found); + error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) goto out_trans_cancel; if (found) { @@ -410,8 +410,8 @@ xfs_reflink_allocate_cow( /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, - resblks, imap, &nimaps); + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, + &nimaps); if (error) goto out_unreserve; @@ -427,15 +427,15 @@ xfs_reflink_allocate_cow( if (nimaps == 0) return -ENOSPC; convert: - xfs_trim_extent(imap, offset_fsb, count_fsb); + xfs_trim_extent(cmap, offset_fsb, count_fsb); /* * COW fork extents are supposed to remain unwritten until we're ready * to initiate a disk write. For direct I/O we are going to write the * data and need the conversion, but for buffered writes we're done. */ - if (!convert_now || imap->br_state == XFS_EXT_NORM) + if (!convert_now || cmap->br_state == XFS_EXT_NORM) return 0; - trace_xfs_reflink_convert_cow(ip, imap); + trace_xfs_reflink_convert_cow(ip, cmap); return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); out_unreserve: @@ -1270,7 +1270,7 @@ xfs_reflink_zero_posteof( trace_xfs_zero_eof(ip, isize, pos - isize); return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_iomap_ops); + &xfs_buffered_write_iomap_ops); } /* @@ -1381,85 +1381,6 @@ out_unlock: return ret; } -/* - * The user wants to preemptively CoW all shared blocks in this file, - * which enables us to turn off the reflink flag. Iterate all - * extents which are not prealloc/delalloc to see which ranges are - * mentioned in the refcount tree, then read those blocks into the - * pagecache, dirty them, fsync them back out, and then we can update - * the inode flag. What happens if we run out of memory? :) - */ -STATIC int -xfs_reflink_dirty_extents( - struct xfs_inode *ip, - xfs_fileoff_t fbno, - xfs_filblks_t end, - xfs_off_t isize) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_extlen_t aglen; - xfs_agblock_t rbno; - xfs_extlen_t rlen; - xfs_off_t fpos; - xfs_off_t flen; - struct xfs_bmbt_irec map[2]; - int nmaps; - int error = 0; - - while (end - fbno > 0) { - nmaps = 1; - /* - * Look for extents in the file. Skip holes, delalloc, or - * unwritten extents; they can't be reflinked. - */ - error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); - if (error) - goto out; - if (nmaps == 0) - break; - if (!xfs_bmap_is_real_extent(&map[0])) - goto next; - - map[1] = map[0]; - while (map[1].br_blockcount) { - agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); - aglen = map[1].br_blockcount; - - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, - aglen, &rbno, &rlen, true); - if (error) - goto out; - if (rbno == NULLAGBLOCK) - break; - - /* Dirty the pages */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - fpos = XFS_FSB_TO_B(mp, map[1].br_startoff + - (rbno - agbno)); - flen = XFS_FSB_TO_B(mp, rlen); - if (fpos + flen > isize) - flen = isize - fpos; - error = iomap_file_unshare(VFS_I(ip), fpos, flen, - &xfs_iomap_ops); - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (error) - goto out; - - map[1].br_blockcount -= (rbno - agbno + rlen); - map[1].br_startoff += (rbno - agbno + rlen); - map[1].br_startblock += (rbno - agbno + rlen); - } - -next: - fbno = map[0].br_startoff + map[0].br_blockcount; - } -out: - return error; -} - /* Does this inode need the reflink flag? */ int xfs_reflink_inode_has_shared_extents( @@ -1596,10 +1517,7 @@ xfs_reflink_unshare( xfs_off_t offset, xfs_off_t len) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t fbno; - xfs_filblks_t end; - xfs_off_t isize; + struct inode *inode = VFS_I(ip); int error; if (!xfs_is_reflink_inode(ip)) @@ -1607,20 +1525,13 @@ xfs_reflink_unshare( trace_xfs_reflink_unshare(ip, offset, len); - inode_dio_wait(VFS_I(ip)); + inode_dio_wait(inode); - /* Try to CoW the selected ranges */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - fbno = XFS_B_TO_FSBT(mp, offset); - isize = i_size_read(VFS_I(ip)); - end = XFS_B_TO_FSB(mp, offset + len); - error = xfs_reflink_dirty_extents(ip, fbno, end, isize); + error = iomap_file_unshare(inode, offset, len, + &xfs_buffered_write_iomap_ops); if (error) - goto out_unlock; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* Wait for the IO to finish */ - error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + goto out; + error = filemap_write_and_wait(inode->i_mapping); if (error) goto out; @@ -1628,11 +1539,8 @@ xfs_reflink_unshare( error = xfs_reflink_try_clear_inode_flag(ip); if (error) goto out; - return 0; -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 28a43b7f581d..d18ad7f4fb64 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -25,8 +25,8 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, bool xfs_inode_need_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared); -extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, - struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode, +int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, + struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now); extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 8939e0ea09cd..4911b68f95dd 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -17,7 +17,7 @@ #include "xfs_rmap_item.h" #include "xfs_log.h" #include "xfs_rmap.h" - +#include "xfs_error.h" kmem_zone_t *xfs_rui_zone; kmem_zone_t *xfs_rud_zone; @@ -34,7 +34,7 @@ xfs_rui_item_free( if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else - kmem_zone_free(xfs_rui_zone, ruip); + kmem_cache_free(xfs_rui_zone, ruip); } /* @@ -171,8 +171,10 @@ xfs_rui_copy_format( src_rui_fmt = buf->i_addr; len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); - if (buf->i_len != len) + if (buf->i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; + } memcpy(dst_rui_fmt, src_rui_fmt, len); return 0; @@ -227,7 +229,7 @@ xfs_rud_item_release( struct xfs_rud_log_item *rudp = RUD_ITEM(lip); xfs_rui_release(rudp->rud_ruip); - kmem_zone_free(xfs_rud_zone, rudp); + kmem_cache_free(xfs_rud_zone, rudp); } static const struct xfs_item_ops xfs_rud_item_ops = { @@ -539,7 +541,7 @@ xfs_rui_recover( */ set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); xfs_rui_release(ruip); - return -EIO; + return -EFSCORRUPTED; } } @@ -581,6 +583,7 @@ xfs_rui_recover( type = XFS_RMAP_FREE; break; default: + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); error = -EFSCORRUPTED; goto abort_error; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 4a48a8c75b4f..d42b5a2047e0 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -792,8 +792,7 @@ xfs_growfs_rt_alloc( */ nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, resblks, &map, - &nmap); + XFS_BMAPI_METADATA, 0, &map, &nmap); if (!error && nmap < 1) error = -ENOSPC; if (error) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0a8cf6b87a21..d9ae27ddf253 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -37,7 +37,8 @@ #include "xfs_reflink.h" #include <linux/magic.h> -#include <linux/parser.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> static const struct super_operations xfs_super_operations; @@ -50,7 +51,7 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ * Table driven mount option parser. */ enum { - Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize, + Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, @@ -58,382 +59,67 @@ enum { Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, - Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, + Opt_discard, Opt_nodiscard, Opt_dax, }; -static const match_table_t tokens = { - {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */ - {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */ - {Opt_logdev, "logdev=%s"}, /* log device */ - {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */ - {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */ - {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */ - {Opt_noalign, "noalign"}, /* turn off stripe alignment */ - {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */ - {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */ - {Opt_swidth, "swidth=%u"}, /* data volume stripe width */ - {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */ - {Opt_grpid, "grpid"}, /* group-ID from parent directory */ - {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */ - {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */ - {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */ - {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */ - {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */ - {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */ - {Opt_inode32, "inode32"}, /* inode allocation limited to - * XFS_MAXINUMBER_32 */ - {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */ - {Opt_noikeep, "noikeep"}, /* free empty inode clusters */ - {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */ - {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes - * in stat(). */ - {Opt_attr2, "attr2"}, /* do use attr2 attribute format */ - {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */ - {Opt_filestreams,"filestreams"},/* use filestreams allocator */ - {Opt_quota, "quota"}, /* disk quotas (user) */ - {Opt_noquota, "noquota"}, /* no quotas */ - {Opt_usrquota, "usrquota"}, /* user quota enabled */ - {Opt_grpquota, "grpquota"}, /* group quota enabled */ - {Opt_prjquota, "prjquota"}, /* project quota enabled */ - {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */ - {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */ - {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */ - {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */ - {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */ - {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */ - {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */ - {Opt_discard, "discard"}, /* Discard unused blocks */ - {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ - {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ - {Opt_err, NULL}, +static const struct fs_parameter_spec xfs_param_specs[] = { + fsparam_u32("logbufs", Opt_logbufs), + fsparam_string("logbsize", Opt_logbsize), + fsparam_string("logdev", Opt_logdev), + fsparam_string("rtdev", Opt_rtdev), + fsparam_flag("wsync", Opt_wsync), + fsparam_flag("noalign", Opt_noalign), + fsparam_flag("swalloc", Opt_swalloc), + fsparam_u32("sunit", Opt_sunit), + fsparam_u32("swidth", Opt_swidth), + fsparam_flag("nouuid", Opt_nouuid), + fsparam_flag("grpid", Opt_grpid), + fsparam_flag("nogrpid", Opt_nogrpid), + fsparam_flag("bsdgroups", Opt_bsdgroups), + fsparam_flag("sysvgroups", Opt_sysvgroups), + fsparam_string("allocsize", Opt_allocsize), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag("inode64", Opt_inode64), + fsparam_flag("inode32", Opt_inode32), + fsparam_flag("ikeep", Opt_ikeep), + fsparam_flag("noikeep", Opt_noikeep), + fsparam_flag("largeio", Opt_largeio), + fsparam_flag("nolargeio", Opt_nolargeio), + fsparam_flag("attr2", Opt_attr2), + fsparam_flag("noattr2", Opt_noattr2), + fsparam_flag("filestreams", Opt_filestreams), + fsparam_flag("quota", Opt_quota), + fsparam_flag("noquota", Opt_noquota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_flag("uquota", Opt_uquota), + fsparam_flag("gquota", Opt_gquota), + fsparam_flag("pquota", Opt_pquota), + fsparam_flag("uqnoenforce", Opt_uqnoenforce), + fsparam_flag("gqnoenforce", Opt_gqnoenforce), + fsparam_flag("pqnoenforce", Opt_pqnoenforce), + fsparam_flag("qnoenforce", Opt_qnoenforce), + fsparam_flag("discard", Opt_discard), + fsparam_flag("nodiscard", Opt_nodiscard), + fsparam_flag("dax", Opt_dax), + {} }; - -STATIC int -suffix_kstrtoint(const substring_t *s, unsigned int base, int *res) -{ - int last, shift_left_factor = 0, _res; - char *value; - int ret = 0; - - value = match_strdup(s); - if (!value) - return -ENOMEM; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - if (kstrtoint(value, base, &_res)) - ret = -EINVAL; - kfree(value); - *res = _res << shift_left_factor; - return ret; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - * - * Note that this function leaks the various device name allocations on - * failure. The caller takes care of them. - * - * *sb is const because this is also used to test options on the remount - * path, and we don't want this to have any side effects at remount time. - * Today this function does not change *sb, but just to future-proof... - */ -STATIC int -xfs_parseargs( - struct xfs_mount *mp, - char *options) -{ - const struct super_block *sb = mp->m_super; - char *p; - substring_t args[MAX_OPT_ARGS]; - int dsunit = 0; - int dswidth = 0; - int iosize = 0; - uint8_t iosizelog = 0; - - /* - * set up the mount name first so all the errors will refer to the - * correct device. - */ - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return -ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - - /* - * Copy binary VFS mount flags we are interested in. - */ - if (sb_rdonly(sb)) - mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & SB_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & SB_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; - - /* - * Set some default flags that could be cleared by the mount option - * parsing. - */ - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * These can be overridden by the mount option parsing. - */ - mp->m_logbufs = -1; - mp->m_logbsize = -1; - - if (!options) - goto done; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_logbufs: - if (match_int(args, &mp->m_logbufs)) - return -EINVAL; - break; - case Opt_logbsize: - if (suffix_kstrtoint(args, 10, &mp->m_logbsize)) - return -EINVAL; - break; - case Opt_logdev: - kfree(mp->m_logname); - mp->m_logname = match_strdup(args); - if (!mp->m_logname) - return -ENOMEM; - break; - case Opt_rtdev: - kfree(mp->m_rtname); - mp->m_rtname = match_strdup(args); - if (!mp->m_rtname) - return -ENOMEM; - break; - case Opt_allocsize: - case Opt_biosize: - if (suffix_kstrtoint(args, 10, &iosize)) - return -EINVAL; - iosizelog = ffs(iosize) - 1; - break; - case Opt_grpid: - case Opt_bsdgroups: - mp->m_flags |= XFS_MOUNT_GRPID; - break; - case Opt_nogrpid: - case Opt_sysvgroups: - mp->m_flags &= ~XFS_MOUNT_GRPID; - break; - case Opt_wsync: - mp->m_flags |= XFS_MOUNT_WSYNC; - break; - case Opt_norecovery: - mp->m_flags |= XFS_MOUNT_NORECOVERY; - break; - case Opt_noalign: - mp->m_flags |= XFS_MOUNT_NOALIGN; - break; - case Opt_swalloc: - mp->m_flags |= XFS_MOUNT_SWALLOC; - break; - case Opt_sunit: - if (match_int(args, &dsunit)) - return -EINVAL; - break; - case Opt_swidth: - if (match_int(args, &dswidth)) - return -EINVAL; - break; - case Opt_inode32: - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - break; - case Opt_inode64: - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - break; - case Opt_nouuid: - mp->m_flags |= XFS_MOUNT_NOUUID; - break; - case Opt_ikeep: - mp->m_flags |= XFS_MOUNT_IKEEP; - break; - case Opt_noikeep: - mp->m_flags &= ~XFS_MOUNT_IKEEP; - break; - case Opt_largeio: - mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; - break; - case Opt_nolargeio: - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - break; - case Opt_attr2: - mp->m_flags |= XFS_MOUNT_ATTR2; - break; - case Opt_noattr2: - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; - break; - case Opt_filestreams: - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - break; - case Opt_noquota: - mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; - mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; - break; - case Opt_quota: - case Opt_uquota: - case Opt_usrquota: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); - break; - case Opt_qnoenforce: - case Opt_uqnoenforce: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_UQUOTA_ENFD; - break; - case Opt_pquota: - case Opt_prjquota: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_PQUOTA_ENFD); - break; - case Opt_pqnoenforce: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_PQUOTA_ENFD; - break; - case Opt_gquota: - case Opt_grpquota: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_GQUOTA_ENFD); - break; - case Opt_gqnoenforce: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_GQUOTA_ENFD; - break; - case Opt_discard: - mp->m_flags |= XFS_MOUNT_DISCARD; - break; - case Opt_nodiscard: - mp->m_flags &= ~XFS_MOUNT_DISCARD; - break; -#ifdef CONFIG_FS_DAX - case Opt_dax: - mp->m_flags |= XFS_MOUNT_DAX; - break; -#endif - default: - xfs_warn(mp, "unknown mount option [%s].", p); - return -EINVAL; - } - } - - /* - * no recovery flag requires a read-only mount - */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); - return -EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - xfs_warn(mp, - "sunit and swidth options incompatible with the noalign option"); - return -EINVAL; - } - -#ifndef CONFIG_XFS_QUOTA - if (XFS_IS_QUOTA_RUNNING(mp)) { - xfs_warn(mp, "quota support not available in this kernel."); - return -EINVAL; - } -#endif - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - xfs_warn(mp, "sunit and swidth must be specified together"); - return -EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - xfs_warn(mp, - "stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return -EINVAL; - } - -done: - if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = dsunit; - mp->m_swidth = dswidth; - } - - if (mp->m_logbufs != -1 && - mp->m_logbufs != 0 && - (mp->m_logbufs < XLOG_MIN_ICLOGS || - mp->m_logbufs > XLOG_MAX_ICLOGS)) { - xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", - mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return -EINVAL; - } - if (mp->m_logbsize != -1 && - mp->m_logbsize != 0 && - (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || - mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(mp->m_logbsize))) { - xfs_warn(mp, - "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - mp->m_logbsize); - return -EINVAL; - } - - if (iosizelog) { - if (iosizelog > XFS_MAX_IO_LOG || - iosizelog < XFS_MIN_IO_LOG) { - xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", - iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return -EINVAL; - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = iosizelog; - mp->m_writeio_log = iosizelog; - } - - return 0; -} +static const struct fs_parameter_description xfs_fs_parameters = { + .name = "xfs", + .specs = xfs_param_specs, +}; struct proc_xfs_info { uint64_t flag; char *str; }; -STATIC void -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) +static int +xfs_fs_show_options( + struct seq_file *m, + struct dentry *root) { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ @@ -447,30 +133,24 @@ xfs_showargs( { XFS_MOUNT_FILESTREAMS, ",filestreams" }, { XFS_MOUNT_GRPID, ",grpid" }, { XFS_MOUNT_DISCARD, ",discard" }, - { XFS_MOUNT_SMALL_INUMS, ",inode32" }, + { XFS_MOUNT_LARGEIO, ",largeio" }, { XFS_MOUNT_DAX, ",dax" }, { 0, NULL } }; - static struct proc_xfs_info xfs_info_unset[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" }, - { XFS_MOUNT_SMALL_INUMS, ",inode64" }, - { 0, NULL } - }; + struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) seq_puts(m, xfs_infop->str); } - for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { - if (!(mp->m_flags & xfs_infop->flag)) - seq_puts(m, xfs_infop->str); - } - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, ",inode%d", + (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64); + + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) seq_printf(m, ",allocsize=%dk", - (int)(1 << mp->m_writeio_log) >> 10); + (1 << mp->m_allocsize_log) >> 10); if (mp->m_logbufs > 0) seq_printf(m, ",logbufs=%d", mp->m_logbufs); @@ -509,6 +189,8 @@ xfs_showargs( if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, ",noquota"); + + return 0; } static uint64_t @@ -807,33 +489,33 @@ xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_super->s_id); if (!mp->m_buf_workqueue) goto out; mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); if (!mp->m_unwritten_workqueue) goto out_destroy_buf; mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, - 0, mp->m_fsname); + 0, mp->m_super->s_id); if (!mp->m_cil_workqueue) goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); if (!mp->m_eofblocks_workqueue) goto out_destroy_reclaim; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, - mp->m_fsname); + mp->m_super->s_id); if (!mp->m_sync_workqueue) goto out_destroy_eofb; @@ -1037,13 +719,13 @@ xfs_fs_drop_inode( return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); } -STATIC void -xfs_free_fsname( +static void +xfs_mount_free( struct xfs_mount *mp) { - kfree(mp->m_fsname); kfree(mp->m_rtname); kfree(mp->m_logname); + kmem_free(mp); } STATIC int @@ -1204,181 +886,6 @@ xfs_quiesce_attr( xfs_log_quiesce(mp); } -STATIC int -xfs_test_remount_options( - struct super_block *sb, - char *options) -{ - int error = 0; - struct xfs_mount *tmp_mp; - - tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL); - if (!tmp_mp) - return -ENOMEM; - - tmp_mp->m_super = sb; - error = xfs_parseargs(tmp_mp, options); - xfs_free_fsname(tmp_mp); - kmem_free(tmp_mp); - - return error; -} - -STATIC int -xfs_fs_remount( - struct super_block *sb, - int *flags, - char *options) -{ - struct xfs_mount *mp = XFS_M(sb); - xfs_sb_t *sbp = &mp->m_sb; - substring_t args[MAX_OPT_ARGS]; - char *p; - int error; - - /* First, check for complete junk; i.e. invalid options */ - error = xfs_test_remount_options(sb, options); - if (error) - return error; - - sync_filesystem(sb); - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_inode64: - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); - break; - case Opt_inode32: - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); - break; - default: - /* - * Logically we would return an error here to prevent - * users from believing they might have changed - * mount options using remount which can't be changed. - * - * But unfortunately mount(8) adds all options from - * mtab and fstab to the mount arguments in some cases - * so we can't blindly reject options, but have to - * check for each specified option if it actually - * differs from the currently set option and only - * reject it if that's the case. - * - * Until that is implemented we return success for - * every remount request, and silently ignore all - * options that we can't actually change. - */ -#if 0 - xfs_info(mp, - "mount option \"%s\" not supported for remount", p); - return -EINVAL; -#else - break; -#endif - } - } - - /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { - if (mp->m_flags & XFS_MOUNT_NORECOVERY) { - xfs_warn(mp, - "ro->rw transition prohibited on norecovery mount"); - return -EINVAL; - } - - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - xfs_sb_has_ro_compat_feature(sbp, - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { - xfs_warn(mp, -"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", - (sbp->sb_features_ro_compat & - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); - return -EINVAL; - } - - mp->m_flags &= ~XFS_MOUNT_RDONLY; - - /* - * If this is the first remount to writeable state we - * might have some superblock changes to update. - */ - if (mp->m_update_sb) { - error = xfs_sync_sb(mp, false); - if (error) { - xfs_warn(mp, "failed to write sb changes"); - return error; - } - mp->m_update_sb = false; - } - - /* - * Fill out the reserve pool if it is empty. Use the stashed - * value if it is non-zero, otherwise go with the default. - */ - xfs_restore_resvblks(mp); - xfs_log_work_queue(mp); - - /* Recover any CoW blocks that never got remapped. */ - error = xfs_reflink_recover_cow(mp); - if (error) { - xfs_err(mp, - "Error %d recovering leftover CoW allocations.", error); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - xfs_start_block_reaping(mp); - - /* Create the per-AG metadata reservation pool .*/ - error = xfs_fs_reserve_ag_blocks(mp); - if (error && error != -ENOSPC) - return error; - } - - /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { - /* - * Cancel background eofb scanning so it cannot race with the - * final log force+buftarg wait and deadlock the remount. - */ - xfs_stop_block_reaping(mp); - - /* Get rid of any leftover CoW reservations... */ - error = xfs_icache_free_cowblocks(mp, NULL); - if (error) { - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - - /* Free the per-AG metadata reservation pool. */ - error = xfs_fs_unreserve_ag_blocks(mp); - if (error) { - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - - /* - * Before we sync the metadata, we need to free up the reserve - * block pool so that the used block count in the superblock on - * disk is correct at the end of the remount. Stash the current - * reserve pool size so that if we get remounted rw, we can - * return it to the same size. - */ - xfs_save_resvblks(mp); - - xfs_quiesce_attr(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; - } - - return 0; -} - /* * Second stage of a freeze. The data is already frozen so we only * need to take care of the metadata. Once that's done sync the superblock @@ -1409,15 +916,6 @@ xfs_fs_unfreeze( return 0; } -STATIC int -xfs_fs_show_options( - struct seq_file *m, - struct dentry *root) -{ - xfs_showargs(XFS_M(root->d_sb), m); - return 0; -} - /* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. @@ -1540,60 +1038,337 @@ xfs_destroy_percpu_counters( percpu_counter_destroy(&mp->m_delalloc_blks); } -static struct xfs_mount * -xfs_mount_alloc( +static void +xfs_fs_put_super( struct super_block *sb) { - struct xfs_mount *mp; + struct xfs_mount *mp = XFS_M(sb); - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); - if (!mp) - return NULL; + /* if ->fill_super failed, we have no mount to tear down */ + if (!sb->s_fs_info) + return; - mp->m_super = sb; - spin_lock_init(&mp->m_sb_lock); - spin_lock_init(&mp->m_agirotor_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); - spin_lock_init(&mp->m_perag_lock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); - INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); - INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); - mp->m_kobj.kobject.kset = xfs_kset; - /* - * We don't create the finobt per-ag space reservation until after log - * recovery, so we must set this to true so that an ifree transaction - * started during log recovery will not depend on space reservations - * for finobt expansion. - */ - mp->m_finobt_nores = true; - return mp; + xfs_notice(mp, "Unmounting Filesystem"); + xfs_filestream_unmount(mp); + xfs_unmountfs(mp); + + xfs_freesb(mp); + free_percpu(mp->m_stats.xs_stats); + xfs_destroy_percpu_counters(mp); + xfs_destroy_mount_workqueues(mp); + xfs_close_devices(mp); + + sb->s_fs_info = NULL; + xfs_mount_free(mp); } +static long +xfs_fs_nr_cached_objects( + struct super_block *sb, + struct shrink_control *sc) +{ + /* Paranoia: catch incorrect calls during mount setup or teardown */ + if (WARN_ON_ONCE(!sb->s_fs_info)) + return 0; + return xfs_reclaim_inodes_count(XFS_M(sb)); +} -STATIC int -xfs_fs_fill_super( +static long +xfs_fs_free_cached_objects( struct super_block *sb, - void *data, - int silent) + struct shrink_control *sc) { - struct inode *root; - struct xfs_mount *mp = NULL; - int flags = 0, error = -ENOMEM; + return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); +} + +static const struct super_operations xfs_super_operations = { + .alloc_inode = xfs_fs_alloc_inode, + .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, + .drop_inode = xfs_fs_drop_inode, + .put_super = xfs_fs_put_super, + .sync_fs = xfs_fs_sync_fs, + .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, + .statfs = xfs_fs_statfs, + .show_options = xfs_fs_show_options, + .nr_cached_objects = xfs_fs_nr_cached_objects, + .free_cached_objects = xfs_fs_free_cached_objects, +}; + +static int +suffix_kstrtoint( + const char *s, + unsigned int base, + int *res) +{ + int last, shift_left_factor = 0, _res; + char *value; + int ret = 0; + + value = kstrdup(s, GFP_KERNEL); + if (!value) + return -ENOMEM; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + if (kstrtoint(value, base, &_res)) + ret = -EINVAL; + kfree(value); + *res = _res << shift_left_factor; + return ret; +} + +/* + * Set mount state from a mount option. + * + * NOTE: mp->m_super is NULL here! + */ +static int +xfs_fc_parse_param( + struct fs_context *fc, + struct fs_parameter *param) +{ + struct xfs_mount *mp = fc->s_fs_info; + struct fs_parse_result result; + int size = 0; + int opt; + + opt = fs_parse(fc, &xfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_logbufs: + mp->m_logbufs = result.uint_32; + return 0; + case Opt_logbsize: + if (suffix_kstrtoint(param->string, 10, &mp->m_logbsize)) + return -EINVAL; + return 0; + case Opt_logdev: + kfree(mp->m_logname); + mp->m_logname = kstrdup(param->string, GFP_KERNEL); + if (!mp->m_logname) + return -ENOMEM; + return 0; + case Opt_rtdev: + kfree(mp->m_rtname); + mp->m_rtname = kstrdup(param->string, GFP_KERNEL); + if (!mp->m_rtname) + return -ENOMEM; + return 0; + case Opt_allocsize: + if (suffix_kstrtoint(param->string, 10, &size)) + return -EINVAL; + mp->m_allocsize_log = ffs(size) - 1; + mp->m_flags |= XFS_MOUNT_ALLOCSIZE; + return 0; + case Opt_grpid: + case Opt_bsdgroups: + mp->m_flags |= XFS_MOUNT_GRPID; + return 0; + case Opt_nogrpid: + case Opt_sysvgroups: + mp->m_flags &= ~XFS_MOUNT_GRPID; + return 0; + case Opt_wsync: + mp->m_flags |= XFS_MOUNT_WSYNC; + return 0; + case Opt_norecovery: + mp->m_flags |= XFS_MOUNT_NORECOVERY; + return 0; + case Opt_noalign: + mp->m_flags |= XFS_MOUNT_NOALIGN; + return 0; + case Opt_swalloc: + mp->m_flags |= XFS_MOUNT_SWALLOC; + return 0; + case Opt_sunit: + mp->m_dalign = result.uint_32; + return 0; + case Opt_swidth: + mp->m_swidth = result.uint_32; + return 0; + case Opt_inode32: + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + return 0; + case Opt_inode64: + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + return 0; + case Opt_nouuid: + mp->m_flags |= XFS_MOUNT_NOUUID; + return 0; + case Opt_ikeep: + mp->m_flags |= XFS_MOUNT_IKEEP; + return 0; + case Opt_noikeep: + mp->m_flags &= ~XFS_MOUNT_IKEEP; + return 0; + case Opt_largeio: + mp->m_flags |= XFS_MOUNT_LARGEIO; + return 0; + case Opt_nolargeio: + mp->m_flags &= ~XFS_MOUNT_LARGEIO; + return 0; + case Opt_attr2: + mp->m_flags |= XFS_MOUNT_ATTR2; + return 0; + case Opt_noattr2: + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; + return 0; + case Opt_filestreams: + mp->m_flags |= XFS_MOUNT_FILESTREAMS; + return 0; + case Opt_noquota: + mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; + mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; + mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; + return 0; + case Opt_quota: + case Opt_uquota: + case Opt_usrquota: + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); + return 0; + case Opt_qnoenforce: + case Opt_uqnoenforce: + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; + return 0; + case Opt_pquota: + case Opt_prjquota: + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_PQUOTA_ENFD); + return 0; + case Opt_pqnoenforce: + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_PQUOTA_ENFD; + return 0; + case Opt_gquota: + case Opt_grpquota: + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_GQUOTA_ENFD); + return 0; + case Opt_gqnoenforce: + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_GQUOTA_ENFD; + return 0; + case Opt_discard: + mp->m_flags |= XFS_MOUNT_DISCARD; + return 0; + case Opt_nodiscard: + mp->m_flags &= ~XFS_MOUNT_DISCARD; + return 0; +#ifdef CONFIG_FS_DAX + case Opt_dax: + mp->m_flags |= XFS_MOUNT_DAX; + return 0; +#endif + default: + xfs_warn(mp, "unknown mount option [%s].", param->key); + return -EINVAL; + } + return 0; +} + +static int +xfs_fc_validate_params( + struct xfs_mount *mp) +{ /* - * allocate mp and do all low-level struct initializations before we - * attach it to the super + * no recovery flag requires a read-only mount */ - mp = xfs_mount_alloc(sb); - if (!mp) - goto out; - sb->s_fs_info = mp; + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return -EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && + (mp->m_dalign || mp->m_swidth)) { + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); + return -EINVAL; + } + + if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { + xfs_warn(mp, "quota support not available in this kernel."); + return -EINVAL; + } + + if ((mp->m_dalign && !mp->m_swidth) || + (!mp->m_dalign && mp->m_swidth)) { + xfs_warn(mp, "sunit and swidth must be specified together"); + return -EINVAL; + } + + if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", + mp->m_swidth, mp->m_dalign); + return -EINVAL; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return -EINVAL; + } + + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return -EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + (mp->m_allocsize_log > XFS_MAX_IO_LOG || + mp->m_allocsize_log < XFS_MIN_IO_LOG)) { + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", + mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); + return -EINVAL; + } + + return 0; +} + +static int +xfs_fc_fill_super( + struct super_block *sb, + struct fs_context *fc) +{ + struct xfs_mount *mp = sb->s_fs_info; + struct inode *root; + int flags = 0, error; + + mp->m_super = sb; - error = xfs_parseargs(mp, (char *)data); + error = xfs_fc_validate_params(mp); if (error) - goto out_free_fsname; + goto out_free_names; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; @@ -1615,12 +1390,12 @@ xfs_fs_fill_super( msleep(xfs_globals.mount_delay * 1000); } - if (silent) + if (fc->sb_flags & SB_SILENT) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_free_fsname; + goto out_free_names; error = xfs_init_mount_workqueues(mp); if (error) @@ -1757,11 +1532,9 @@ xfs_fs_fill_super( xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); - out_free_fsname: + out_free_names: sb->s_fs_info = NULL; - xfs_free_fsname(mp); - kfree(mp); - out: + xfs_mount_free(mp); return error; out_unmount: @@ -1770,80 +1543,252 @@ xfs_fs_fill_super( goto out_free_sb; } -STATIC void -xfs_fs_put_super( - struct super_block *sb) +static int +xfs_fc_get_tree( + struct fs_context *fc) { - struct xfs_mount *mp = XFS_M(sb); + return get_tree_bdev(fc, xfs_fc_fill_super); +} - /* if ->fill_super failed, we have no mount to tear down */ - if (!sb->s_fs_info) - return; +static int +xfs_remount_rw( + struct xfs_mount *mp) +{ + struct xfs_sb *sbp = &mp->m_sb; + int error; - xfs_notice(mp, "Unmounting Filesystem"); - xfs_filestream_unmount(mp); - xfs_unmountfs(mp); + if (mp->m_flags & XFS_MOUNT_NORECOVERY) { + xfs_warn(mp, + "ro->rw transition prohibited on norecovery mount"); + return -EINVAL; + } - xfs_freesb(mp); - free_percpu(mp->m_stats.xs_stats); - xfs_destroy_percpu_counters(mp); - xfs_destroy_mount_workqueues(mp); - xfs_close_devices(mp); + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + xfs_warn(mp, + "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", + (sbp->sb_features_ro_compat & + XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); + return -EINVAL; + } - sb->s_fs_info = NULL; - xfs_free_fsname(mp); - kfree(mp); + mp->m_flags &= ~XFS_MOUNT_RDONLY; + + /* + * If this is the first remount to writeable state we might have some + * superblock changes to update. + */ + if (mp->m_update_sb) { + error = xfs_sync_sb(mp, false); + if (error) { + xfs_warn(mp, "failed to write sb changes"); + return error; + } + mp->m_update_sb = false; + } + + /* + * Fill out the reserve pool if it is empty. Use the stashed value if + * it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); + xfs_log_work_queue(mp); + + /* Recover any CoW blocks that never got remapped. */ + error = xfs_reflink_recover_cow(mp); + if (error) { + xfs_err(mp, + "Error %d recovering leftover CoW allocations.", error); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + xfs_start_block_reaping(mp); + + /* Create the per-AG metadata reservation pool .*/ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) + return error; + + return 0; } -STATIC struct dentry * -xfs_fs_mount( - struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) +static int +xfs_remount_ro( + struct xfs_mount *mp) { - return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); + int error; + + /* + * Cancel background eofb scanning so it cannot race with the final + * log force+buftarg wait and deadlock the remount. + */ + xfs_stop_block_reaping(mp); + + /* Get rid of any leftover CoW reservations... */ + error = xfs_icache_free_cowblocks(mp, NULL); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + /* Free the per-AG metadata reservation pool. */ + error = xfs_fs_unreserve_ag_blocks(mp); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + /* + * Before we sync the metadata, we need to free up the reserve block + * pool so that the used block count in the superblock on disk is + * correct at the end of the remount. Stash the current* reserve pool + * size so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_save_resvblks(mp); + + xfs_quiesce_attr(mp); + mp->m_flags |= XFS_MOUNT_RDONLY; + + return 0; } -static long -xfs_fs_nr_cached_objects( - struct super_block *sb, - struct shrink_control *sc) +/* + * Logically we would return an error here to prevent users from believing + * they might have changed mount options using remount which can't be changed. + * + * But unfortunately mount(8) adds all options from mtab and fstab to the mount + * arguments in some cases so we can't blindly reject options, but have to + * check for each specified option if it actually differs from the currently + * set option and only reject it if that's the case. + * + * Until that is implemented we return success for every remount request, and + * silently ignore all options that we can't actually change. + */ +static int +xfs_fc_reconfigure( + struct fs_context *fc) { - /* Paranoia: catch incorrect calls during mount setup or teardown */ - if (WARN_ON_ONCE(!sb->s_fs_info)) - return 0; - return xfs_reclaim_inodes_count(XFS_M(sb)); + struct xfs_mount *mp = XFS_M(fc->root->d_sb); + struct xfs_mount *new_mp = fc->s_fs_info; + xfs_sb_t *sbp = &mp->m_sb; + int flags = fc->sb_flags; + int error; + + error = xfs_fc_validate_params(new_mp); + if (error) + return error; + + sync_filesystem(mp->m_super); + + /* inode32 -> inode64 */ + if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && + !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + } + + /* inode64 -> inode32 */ + if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) && + (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + } + + /* ro -> rw */ + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) { + error = xfs_remount_rw(mp); + if (error) + return error; + } + + /* rw -> ro */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) { + error = xfs_remount_ro(mp); + if (error) + return error; + } + + return 0; } -static long -xfs_fs_free_cached_objects( - struct super_block *sb, - struct shrink_control *sc) +static void xfs_fc_free( + struct fs_context *fc) { - return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); + struct xfs_mount *mp = fc->s_fs_info; + + /* + * mp is stored in the fs_context when it is initialized. + * mp is transferred to the superblock on a successful mount, + * but if an error occurs before the transfer we have to free + * it here. + */ + if (mp) + xfs_mount_free(mp); } -static const struct super_operations xfs_super_operations = { - .alloc_inode = xfs_fs_alloc_inode, - .destroy_inode = xfs_fs_destroy_inode, - .dirty_inode = xfs_fs_dirty_inode, - .drop_inode = xfs_fs_drop_inode, - .put_super = xfs_fs_put_super, - .sync_fs = xfs_fs_sync_fs, - .freeze_fs = xfs_fs_freeze, - .unfreeze_fs = xfs_fs_unfreeze, - .statfs = xfs_fs_statfs, - .remount_fs = xfs_fs_remount, - .show_options = xfs_fs_show_options, - .nr_cached_objects = xfs_fs_nr_cached_objects, - .free_cached_objects = xfs_fs_free_cached_objects, +static const struct fs_context_operations xfs_context_ops = { + .parse_param = xfs_fc_parse_param, + .get_tree = xfs_fc_get_tree, + .reconfigure = xfs_fc_reconfigure, + .free = xfs_fc_free, }; +static int xfs_init_fs_context( + struct fs_context *fc) +{ + struct xfs_mount *mp; + + mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); + if (!mp) + return -ENOMEM; + + spin_lock_init(&mp->m_sb_lock); + spin_lock_init(&mp->m_agirotor_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); + spin_lock_init(&mp->m_perag_lock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); + mp->m_kobj.kobject.kset = xfs_kset; + /* + * We don't create the finobt per-ag space reservation until after log + * recovery, so we must set this to true so that an ifree transaction + * started during log recovery will not depend on space reservations + * for finobt expansion. + */ + mp->m_finobt_nores = true; + + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; + mp->m_allocsize_log = 16; /* 64k */ + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (fc->sb_flags & SB_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (fc->sb_flags & SB_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + fc->s_fs_info = mp; + fc->ops = &xfs_context_ops; + + return 0; +} + static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", - .mount = xfs_fs_mount, + .init_fs_context = xfs_init_fs_context, + .parameters = &xfs_fs_parameters, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -1852,32 +1797,39 @@ MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) { - xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), - "xfs_log_ticket"); + xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + sizeof(struct xlog_ticket), + 0, 0, NULL); if (!xfs_log_ticket_zone) goto out; - xfs_bmap_free_item_zone = kmem_zone_init( - sizeof(struct xfs_extent_free_item), - "xfs_bmap_free_item"); + xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; - xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), - "xfs_btree_cur"); + xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", + sizeof(struct xfs_btree_cur), + 0, 0, NULL); if (!xfs_btree_cur_zone) goto out_destroy_bmap_free_item_zone; - xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), - "xfs_da_state"); + xfs_da_state_zone = kmem_cache_create("xfs_da_state", + sizeof(struct xfs_da_state), + 0, 0, NULL); if (!xfs_da_state_zone) goto out_destroy_btree_cur_zone; - xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork"); + xfs_ifork_zone = kmem_cache_create("xfs_ifork", + sizeof(struct xfs_ifork), + 0, 0, NULL); if (!xfs_ifork_zone) goto out_destroy_da_state_zone; - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); + xfs_trans_zone = kmem_cache_create("xf_trans", + sizeof(struct xfs_trans), + 0, 0, NULL); if (!xfs_trans_zone) goto out_destroy_ifork_zone; @@ -1887,109 +1839,121 @@ xfs_init_zones(void) * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item), - "xfs_buf_item"); + xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + sizeof(struct xfs_buf_log_item), + 0, 0, NULL); if (!xfs_buf_item_zone) goto out_destroy_trans_zone; - xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + - ((XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efd_item"); + xfs_efd_zone = kmem_cache_create("xfs_efd_item", + (sizeof(struct xfs_efd_log_item) + + (XFS_EFD_MAX_FAST_EXTENTS - 1) * + sizeof(struct xfs_extent)), + 0, 0, NULL); if (!xfs_efd_zone) goto out_destroy_buf_item_zone; - xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + - ((XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efi_item"); + xfs_efi_zone = kmem_cache_create("xfs_efi_item", + (sizeof(struct xfs_efi_log_item) + + (XFS_EFI_MAX_FAST_EXTENTS - 1) * + sizeof(struct xfs_extent)), + 0, 0, NULL); if (!xfs_efi_zone) goto out_destroy_efd_zone; - xfs_inode_zone = - kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD | - KM_ZONE_ACCOUNT, xfs_fs_inode_init_once); + xfs_inode_zone = kmem_cache_create("xfs_inode", + sizeof(struct xfs_inode), 0, + (SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD | SLAB_ACCOUNT), + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; - xfs_ili_zone = - kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", - KM_ZONE_SPREAD, NULL); + xfs_ili_zone = kmem_cache_create("xfs_ili", + sizeof(struct xfs_inode_log_item), 0, + SLAB_MEM_SPREAD, NULL); if (!xfs_ili_zone) goto out_destroy_inode_zone; - xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), - "xfs_icr"); + + xfs_icreate_zone = kmem_cache_create("xfs_icr", + sizeof(struct xfs_icreate_item), + 0, 0, NULL); if (!xfs_icreate_zone) goto out_destroy_ili_zone; - xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item), - "xfs_rud_item"); + xfs_rud_zone = kmem_cache_create("xfs_rud_item", + sizeof(struct xfs_rud_log_item), + 0, 0, NULL); if (!xfs_rud_zone) goto out_destroy_icreate_zone; - xfs_rui_zone = kmem_zone_init( + xfs_rui_zone = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), - "xfs_rui_item"); + 0, 0, NULL); if (!xfs_rui_zone) goto out_destroy_rud_zone; - xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item), - "xfs_cud_item"); + xfs_cud_zone = kmem_cache_create("xfs_cud_item", + sizeof(struct xfs_cud_log_item), + 0, 0, NULL); if (!xfs_cud_zone) goto out_destroy_rui_zone; - xfs_cui_zone = kmem_zone_init( + xfs_cui_zone = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), - "xfs_cui_item"); + 0, 0, NULL); if (!xfs_cui_zone) goto out_destroy_cud_zone; - xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item), - "xfs_bud_item"); + xfs_bud_zone = kmem_cache_create("xfs_bud_item", + sizeof(struct xfs_bud_log_item), + 0, 0, NULL); if (!xfs_bud_zone) goto out_destroy_cui_zone; - xfs_bui_zone = kmem_zone_init( + xfs_bui_zone = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), - "xfs_bui_item"); + 0, 0, NULL); if (!xfs_bui_zone) goto out_destroy_bud_zone; return 0; out_destroy_bud_zone: - kmem_zone_destroy(xfs_bud_zone); + kmem_cache_destroy(xfs_bud_zone); out_destroy_cui_zone: - kmem_zone_destroy(xfs_cui_zone); + kmem_cache_destroy(xfs_cui_zone); out_destroy_cud_zone: - kmem_zone_destroy(xfs_cud_zone); + kmem_cache_destroy(xfs_cud_zone); out_destroy_rui_zone: - kmem_zone_destroy(xfs_rui_zone); + kmem_cache_destroy(xfs_rui_zone); out_destroy_rud_zone: - kmem_zone_destroy(xfs_rud_zone); + kmem_cache_destroy(xfs_rud_zone); out_destroy_icreate_zone: - kmem_zone_destroy(xfs_icreate_zone); + kmem_cache_destroy(xfs_icreate_zone); out_destroy_ili_zone: - kmem_zone_destroy(xfs_ili_zone); + kmem_cache_destroy(xfs_ili_zone); out_destroy_inode_zone: - kmem_zone_destroy(xfs_inode_zone); + kmem_cache_destroy(xfs_inode_zone); out_destroy_efi_zone: - kmem_zone_destroy(xfs_efi_zone); + kmem_cache_destroy(xfs_efi_zone); out_destroy_efd_zone: - kmem_zone_destroy(xfs_efd_zone); + kmem_cache_destroy(xfs_efd_zone); out_destroy_buf_item_zone: - kmem_zone_destroy(xfs_buf_item_zone); + kmem_cache_destroy(xfs_buf_item_zone); out_destroy_trans_zone: - kmem_zone_destroy(xfs_trans_zone); + kmem_cache_destroy(xfs_trans_zone); out_destroy_ifork_zone: - kmem_zone_destroy(xfs_ifork_zone); + kmem_cache_destroy(xfs_ifork_zone); out_destroy_da_state_zone: - kmem_zone_destroy(xfs_da_state_zone); + kmem_cache_destroy(xfs_da_state_zone); out_destroy_btree_cur_zone: - kmem_zone_destroy(xfs_btree_cur_zone); + kmem_cache_destroy(xfs_btree_cur_zone); out_destroy_bmap_free_item_zone: - kmem_zone_destroy(xfs_bmap_free_item_zone); + kmem_cache_destroy(xfs_bmap_free_item_zone); out_destroy_log_ticket_zone: - kmem_zone_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_log_ticket_zone); out: return -ENOMEM; } @@ -2002,24 +1966,24 @@ xfs_destroy_zones(void) * destroy caches. */ rcu_barrier(); - kmem_zone_destroy(xfs_bui_zone); - kmem_zone_destroy(xfs_bud_zone); - kmem_zone_destroy(xfs_cui_zone); - kmem_zone_destroy(xfs_cud_zone); - kmem_zone_destroy(xfs_rui_zone); - kmem_zone_destroy(xfs_rud_zone); - kmem_zone_destroy(xfs_icreate_zone); - kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_inode_zone); - kmem_zone_destroy(xfs_efi_zone); - kmem_zone_destroy(xfs_efd_zone); - kmem_zone_destroy(xfs_buf_item_zone); - kmem_zone_destroy(xfs_trans_zone); - kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_da_state_zone); - kmem_zone_destroy(xfs_btree_cur_zone); - kmem_zone_destroy(xfs_bmap_free_item_zone); - kmem_zone_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_bui_zone); + kmem_cache_destroy(xfs_bud_zone); + kmem_cache_destroy(xfs_cui_zone); + kmem_cache_destroy(xfs_cud_zone); + kmem_cache_destroy(xfs_rui_zone); + kmem_cache_destroy(xfs_rud_zone); + kmem_cache_destroy(xfs_icreate_zone); + kmem_cache_destroy(xfs_ili_zone); + kmem_cache_destroy(xfs_inode_zone); + kmem_cache_destroy(xfs_efi_zone); + kmem_cache_destroy(xfs_efd_zone); + kmem_cache_destroy(xfs_buf_item_zone); + kmem_cache_destroy(xfs_trans_zone); + kmem_cache_destroy(xfs_ifork_zone); + kmem_cache_destroy(xfs_da_state_zone); + kmem_cache_destroy(xfs_btree_cur_zone); + kmem_cache_destroy(xfs_bmap_free_item_zone); + kmem_cache_destroy(xfs_log_ticket_zone); } STATIC int __init diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 763e43d22dee..b552cf6d3379 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -11,9 +11,11 @@ #ifdef CONFIG_XFS_QUOTA extern int xfs_qm_init(void); extern void xfs_qm_exit(void); +# define XFS_QUOTA_STRING "quota, " #else # define xfs_qm_init() (0) # define xfs_qm_exit() do { } while (0) +# define XFS_QUOTA_STRING #endif #ifdef CONFIG_XFS_POSIX_ACL @@ -50,6 +52,12 @@ extern void xfs_qm_exit(void); # define XFS_WARN_STRING #endif +#ifdef CONFIG_XFS_ASSERT_FATAL +# define XFS_ASSERT_FATAL_STRING "fatal assert, " +#else +# define XFS_ASSERT_FATAL_STRING +#endif + #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -63,6 +71,8 @@ extern void xfs_qm_exit(void); XFS_SCRUB_STRING \ XFS_REPAIR_STRING \ XFS_WARN_STRING \ + XFS_QUOTA_STRING \ + XFS_ASSERT_FATAL_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index ed66fd2de327..a25502bc2071 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -17,6 +17,7 @@ #include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_quota.h" +#include "xfs_symlink.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_trans.h" diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index 9743d8c9394b..b1fa091427e6 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -5,7 +5,7 @@ #ifndef __XFS_SYMLINK_H #define __XFS_SYMLINK_H 1 -/* Kernel only symlink defintions */ +/* Kernel only symlink definitions */ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cbb23d7a3554..c13bb3655e48 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -725,7 +725,7 @@ TRACE_EVENT(xfs_iomap_prealloc_size, __entry->writeio_blocks = writeio_blocks; ), TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d " - "m_writeio_blocks %u", + "m_allocsize_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->blocks, __entry->shift, __entry->writeio_blocks) ) @@ -1577,8 +1577,11 @@ DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_near_first); -DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); -DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); +DEFINE_ALLOC_EVENT(xfs_alloc_cur); +DEFINE_ALLOC_EVENT(xfs_alloc_cur_right); +DEFINE_ALLOC_EVENT(xfs_alloc_cur_left); +DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup); +DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup_done); DEFINE_ALLOC_EVENT(xfs_alloc_near_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); @@ -1598,6 +1601,32 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); +TRACE_EVENT(xfs_alloc_cur_check, + TP_PROTO(struct xfs_mount *mp, xfs_btnum_t btnum, xfs_agblock_t bno, + xfs_extlen_t len, xfs_extlen_t diff, bool new), + TP_ARGS(mp, btnum, bno, len, diff, new), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_btnum_t, btnum) + __field(xfs_agblock_t, bno) + __field(xfs_extlen_t, len) + __field(xfs_extlen_t, diff) + __field(bool, new) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->btnum = btnum; + __entry->bno = bno; + __entry->len = len; + __entry->diff = diff; + __entry->new = new; + ), + TP_printk("dev %d:%d btree %s bno 0x%x len 0x%x diff 0x%x new %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), + __entry->bno, __entry->len, __entry->diff, __entry->new) +) + DECLARE_EVENT_CLASS(xfs_da_class, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f4795fdb7389..3b208f9a865c 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -71,7 +71,7 @@ xfs_trans_free( if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); - kmem_zone_free(xfs_trans_zone, tp); + kmem_cache_free(xfs_trans_zone, tp); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 6ccfd75d3c24..00cc5b8734be 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -427,15 +427,15 @@ xfsaild_push( case XFS_ITEM_FLUSHING: /* - * The item or its backing buffer is already beeing + * The item or its backing buffer is already being * flushed. The typical reason for that is that an * inode buffer is locked because we already pushed the * updates to it as part of inode clustering. * * We do not want to to stop flushing just because lots - * of items are already beeing flushed, but we need to + * of items are already being flushed, but we need to * re-try the flushing relatively soon if most of the - * AIL is beeing flushed. + * AIL is being flushed. */ XFS_STATS_INC(mp, xs_push_ail_flushing); trace_xfs_ail_flushing(lip); @@ -612,7 +612,7 @@ xfsaild( * The push is run asynchronously in a workqueue, which means the caller needs * to handle waiting on the async flush for space to become available. * We don't want to interrupt any push that is in progress, hence we only queue - * work if we set the pushing bit approriately. + * work if we set the pushing bit appropriately. * * We do this unlocked - we only need to know whether there is anything in the * AIL at the time we are called. We don't need to access the contents of @@ -836,7 +836,7 @@ xfs_trans_ail_init( init_waitqueue_head(&ailp->ail_empty); ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->ail_mount->m_fsname); + ailp->ail_mount->m_super->s_id); if (IS_ERR(ailp->ail_task)) goto out_free_ailp; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 16457465833b..a6fe2d8dc40f 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -25,8 +25,8 @@ STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); */ void xfs_trans_dqjoin( - xfs_trans_t *tp, - xfs_dquot_t *dqp) + struct xfs_trans *tp, + struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(dqp->q_logitem.qli_dquot == dqp); @@ -49,8 +49,8 @@ xfs_trans_dqjoin( */ void xfs_trans_log_dquot( - xfs_trans_t *tp, - xfs_dquot_t *dqp) + struct xfs_trans *tp, + struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -486,12 +486,12 @@ xfs_trans_apply_dquot_deltas( */ void xfs_trans_unreserve_and_mod_dquots( - xfs_trans_t *tp) + struct xfs_trans *tp) { int i, j; - xfs_dquot_t *dqp; + struct xfs_dquot *dqp; struct xfs_dqtrx *qtrx, *qa; - bool locked; + bool locked; if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; @@ -571,21 +571,21 @@ xfs_quota_warn( */ STATIC int xfs_trans_dqresv( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *dqp, - int64_t nblks, - long ninos, - uint flags) + struct xfs_trans *tp, + struct xfs_mount *mp, + struct xfs_dquot *dqp, + int64_t nblks, + long ninos, + uint flags) { - xfs_qcnt_t hardlimit; - xfs_qcnt_t softlimit; - time_t timer; - xfs_qwarncnt_t warns; - xfs_qwarncnt_t warnlimit; - xfs_qcnt_t total_count; - xfs_qcnt_t *resbcountp; - xfs_quotainfo_t *q = mp->m_quotainfo; + xfs_qcnt_t hardlimit; + xfs_qcnt_t softlimit; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t total_count; + xfs_qcnt_t *resbcountp; + struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_def_quota *defq; @@ -824,13 +824,13 @@ xfs_trans_reserve_quota_nblks( /* * This routine is called to allocate a quotaoff log item. */ -xfs_qoff_logitem_t * +struct xfs_qoff_logitem * xfs_trans_get_qoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *startqoff, + struct xfs_trans *tp, + struct xfs_qoff_logitem *startqoff, uint flags) { - xfs_qoff_logitem_t *q; + struct xfs_qoff_logitem *q; ASSERT(tp != NULL); @@ -852,8 +852,8 @@ xfs_trans_get_qoff_item( */ void xfs_trans_log_quotaoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *qlp) + struct xfs_trans *tp, + struct xfs_qoff_logitem *qlp) { tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags); @@ -872,6 +872,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo); tp->t_dqinfo = NULL; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index cb895b1df5e4..383f0203d103 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -11,6 +11,7 @@ #include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_attr.h" +#include "xfs_acl.h" #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> diff --git a/include/linux/falloc.h b/include/linux/falloc.h index fc61fdb9d1e9..8bf3d79f3e82 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -20,7 +20,10 @@ struct space_resv { }; #define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) +#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv) #define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) +#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv) +#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv) #define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ FALLOC_FL_PUNCH_HOLE | \ @@ -42,10 +45,13 @@ struct space_resv_32 { __s32 l_pad[4]; /* reserve area */ }; -#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32) +#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32) +#define FS_IOC_UNRESVSP_32 _IOW ('X', 41, struct space_resv_32) #define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) +#define FS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct space_resv_32) +#define FS_IOC_ZERO_RANGE_32 _IOW ('X', 57, struct space_resv_32) -int compat_ioctl_preallocate(struct file *, struct space_resv_32 __user *); +int compat_ioctl_preallocate(struct file *, int, struct space_resv_32 __user *); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index c159a8bdee8b..98e0349adb52 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,7 +2554,7 @@ extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -extern int ioctl_preallocate(struct file *filp, void __user *argp); +extern int ioctl_preallocate(struct file *filp, int mode, void __user *argp); /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); |