diff options
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
-rw-r--r-- | fs/xfs/xfs_iomap.c | 865 |
1 files changed, 438 insertions, 427 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f780e223b118..28e2d1f37267 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -29,8 +29,8 @@ #include "xfs_reflink.h" -#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ - << mp->m_writeio_log) +#define XFS_ALLOC_ALIGN(mp, off) \ + (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) static int xfs_alert_fsblock_zero( @@ -54,9 +54,10 @@ xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, - bool shared) + u16 flags) { struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) return xfs_alert_fsblock_zero(ip, imap); @@ -77,14 +78,13 @@ xfs_bmbt_to_iomap( } iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); - iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); - iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); + iomap->bdev = target->bt_bdev; + iomap->dax_dev = target->bt_daxdev; + iomap->flags = flags; if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; - if (shared) - iomap->flags |= IOMAP_F_SHARED; return 0; } @@ -95,18 +95,30 @@ xfs_hole_to_iomap( xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); - iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); - iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); + iomap->bdev = target->bt_bdev; + iomap->dax_dev = target->bt_daxdev; +} + +static inline xfs_fileoff_t +xfs_iomap_end_fsb( + struct xfs_mount *mp, + loff_t offset, + loff_t count) +{ + ASSERT(offset <= mp->m_super->s_maxbytes); + return min(XFS_B_TO_FSB(mp, offset + count), + XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); } -xfs_extlen_t +static xfs_extlen_t xfs_eof_alignment( - struct xfs_inode *ip, - xfs_extlen_t extsize) + struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; xfs_extlen_t align = 0; @@ -129,111 +141,80 @@ xfs_eof_alignment( align = 0; } - /* - * Always round up the allocation request to an extent boundary - * (when file on a real-time subvolume or has di_extsize hint). - */ - if (extsize) { - if (align) - align = roundup_64(align, extsize); - else - align = extsize; - } - return align; } -STATIC int +/* + * Check if last_fsb is outside the last extent, and if so grow it to the next + * stripe unit boundary. + */ +xfs_fileoff_t xfs_iomap_eof_align_last_fsb( struct xfs_inode *ip, - xfs_extlen_t extsize, - xfs_fileoff_t *last_fsb) + xfs_fileoff_t end_fsb) { - xfs_extlen_t align = xfs_eof_alignment(ip, extsize); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_extlen_t extsz = xfs_get_extsz_hint(ip); + xfs_extlen_t align = xfs_eof_alignment(ip); + struct xfs_bmbt_irec irec; + struct xfs_iext_cursor icur; + + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + + /* + * Always round up the allocation request to the extent hint boundary. + */ + if (extsz) { + if (align) + align = roundup_64(align, extsz); + else + align = extsz; + } if (align) { - xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); - int eof, error; + xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); - error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); - if (error) - return error; - if (eof) - *last_fsb = new_last_fsb; + xfs_iext_last(ifp, &icur); + if (!xfs_iext_get_extent(ifp, &icur, &irec) || + aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) + return aligned_end_fsb; } - return 0; + + return end_fsb; } int xfs_iomap_write_direct( - xfs_inode_t *ip, - xfs_off_t offset, - size_t count, - xfs_bmbt_irec_t *imap, - int nmaps) + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t count_fsb, + struct xfs_bmbt_irec *imap) { - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t offset_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t count_fsb, resaligned; - xfs_extlen_t extsz; - int nimaps; - int quota_flag; - int rt; - xfs_trans_t *tp; - uint qblocks, resblks, resrtextents; - int error; - int lockmode; - int bmapi_flags = XFS_BMAPI_PREALLOC; - uint tflags = 0; - - rt = XFS_IS_REALTIME_INODE(ip); - extsz = xfs_get_extsz_hint(ip); - lockmode = XFS_ILOCK_SHARED; /* locked by caller */ - - ASSERT(xfs_isilocked(ip, lockmode)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + xfs_filblks_t resaligned; + int nimaps; + int quota_flag; + uint qblocks, resblks; + unsigned int resrtextents = 0; + int error; + int bmapi_flags = XFS_BMAPI_PREALLOC; + uint tflags = 0; - offset_fsb = XFS_B_TO_FSBT(mp, offset); - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); - if ((offset + count) > XFS_ISIZE(ip)) { - /* - * Assert that the in-core extent list is present since this can - * call xfs_iread_extents() and we only have the ilock shared. - * This should be safe because the lock was held around a bmapi - * call in the caller and we only need it to access the in-core - * list. - */ - ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & - XFS_IFEXTENTS); - error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); - if (error) - goto out_unlock; - } else { - if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) - last_fsb = min(last_fsb, (xfs_fileoff_t) - imap->br_blockcount + - imap->br_startoff); - } - count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz); - if (unlikely(rt)) { + resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, + xfs_get_extsz_hint(ip)); + if (unlikely(XFS_IS_REALTIME_INODE(ip))) { resrtextents = qblocks = resaligned; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { - resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); quota_flag = XFS_QMOPT_RES_REGBLKS; } - /* - * Drop the shared lock acquired by the caller, attach the dquot if - * necessary and move on to transaction setup. - */ - xfs_iunlock(ip, lockmode); error = xfs_qm_dqattach(ip); if (error) return error; @@ -263,8 +244,7 @@ xfs_iomap_write_direct( if (error) return error; - lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockmode); + xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -277,8 +257,8 @@ xfs_iomap_write_direct( * caller gave to us. */ nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - bmapi_flags, resblks, imap, &nimaps); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, + imap, &nimaps); if (error) goto out_res_cancel; @@ -301,7 +281,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, lockmode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_res_cancel: @@ -410,19 +390,19 @@ xfs_iomap_prealloc_size( if (offset + count <= XFS_ISIZE(ip)) return 0; - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && - (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks))) + if (!(mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks))) return 0; /* * If an explicit allocsize is set, the file is small, or we * are writing behind a hole, then use the minimum prealloc: */ - if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || + if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) || XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || !xfs_iext_peek_prev_extent(ifp, icur, &prev) || prev.br_startoff + prev.br_blockcount < offset_fsb) - return mp->m_writeio_blocks; + return mp->m_allocsize_blocks; /* * Determine the initial size of the preallocation. We are beyond the @@ -515,219 +495,13 @@ xfs_iomap_prealloc_size( while (alloc_blocks && alloc_blocks >= freesp) alloc_blocks >>= 4; check_writeio: - if (alloc_blocks < mp->m_writeio_blocks) - alloc_blocks = mp->m_writeio_blocks; + if (alloc_blocks < mp->m_allocsize_blocks) + alloc_blocks = mp->m_allocsize_blocks; trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, - mp->m_writeio_blocks); + mp->m_allocsize_blocks); return alloc_blocks; } -static int -xfs_file_iomap_begin_delay( - struct inode *inode, - loff_t offset, - loff_t count, - unsigned flags, - struct iomap *iomap) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t maxbytes_fsb = - XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb; - struct xfs_bmbt_irec imap, cmap; - struct xfs_iext_cursor icur, ccur; - xfs_fsblock_t prealloc_blocks = 0; - bool eof = false, cow_eof = false, shared = false; - int whichfork = XFS_DATA_FORK; - int error = 0; - - ASSERT(!XFS_IS_REALTIME_INODE(ip)); - ASSERT(!xfs_get_extsz_hint(ip)); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - error = -EFSCORRUPTED; - goto out_unlock; - } - - XFS_STATS_INC(mp, xs_blk_mapw); - - if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - goto out_unlock; - } - - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - - /* - * Search the data fork fork first to look up our source mapping. We - * always need the data fork map, as we have to return it to the - * iomap code so that the higher level write code can read data in to - * perform read-modify-write cycles for unaligned writes. - */ - eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); - if (eof) - imap.br_startoff = end_fsb; /* fake hole until the end */ - - /* We never need to allocate blocks for zeroing a hole. */ - if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { - xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); - goto out_unlock; - } - - /* - * Search the COW fork extent list even if we did not find a data fork - * extent. This serves two purposes: first this implements the - * speculative preallocation using cowextsize, so that we also unshare - * block adjacent to shared blocks instead of just the shared blocks - * themselves. Second the lookup in the extent list is generally faster - * than going out to the shared extent tree. - */ - if (xfs_is_cow_inode(ip)) { - if (!ip->i_cowfp) { - ASSERT(!xfs_is_reflink_inode(ip)); - xfs_ifork_init_cow(ip); - } - cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, - &ccur, &cmap); - if (!cow_eof && cmap.br_startoff <= offset_fsb) { - trace_xfs_reflink_cow_found(ip, &cmap); - whichfork = XFS_COW_FORK; - goto done; - } - } - - if (imap.br_startoff <= offset_fsb) { - /* - * For reflink files we may need a delalloc reservation when - * overwriting shared extents. This includes zeroing of - * existing extents that contain data. - */ - if (!xfs_is_cow_inode(ip) || - ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, - &imap); - goto done; - } - - xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); - - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_inode_need_cow(ip, &imap, &shared); - if (error) - goto out_unlock; - - /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) { - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, - &imap); - goto done; - } - - /* - * Fork all the shared blocks from our write offset until the - * end of the extent. - */ - whichfork = XFS_COW_FORK; - end_fsb = imap.br_startoff + imap.br_blockcount; - } else { - /* - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES - * pages to keep the chunks of work done where somewhat - * symmetric with the work writeback does. This is a completely - * arbitrary number pulled out of thin air. - * - * Note that the values needs to be less than 32-bits wide until - * the lower level functions are updated. - */ - count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - - if (xfs_is_always_cow_inode(ip)) - whichfork = XFS_COW_FORK; - } - - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - - if (eof) { - prealloc_blocks = xfs_iomap_prealloc_size(ip, whichfork, offset, - count, &icur); - if (prealloc_blocks) { - xfs_extlen_t align; - xfs_off_t end_offset; - xfs_fileoff_t p_end_fsb; - - end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; - - align = xfs_eof_alignment(ip, 0); - if (align) - p_end_fsb = roundup_64(p_end_fsb, align); - - p_end_fsb = min(p_end_fsb, maxbytes_fsb); - ASSERT(p_end_fsb > offset_fsb); - prealloc_blocks = p_end_fsb - end_fsb; - } - } - -retry: - error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, - whichfork == XFS_DATA_FORK ? &imap : &cmap, - whichfork == XFS_DATA_FORK ? &icur : &ccur, - whichfork == XFS_DATA_FORK ? eof : cow_eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ - trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc_blocks) { - prealloc_blocks = 0; - goto retry; - } - /*FALLTHRU*/ - default: - goto out_unlock; - } - - /* - * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch - * them out if the write happens to fail. - */ - iomap->flags |= IOMAP_F_NEW; - trace_xfs_iomap_alloc(ip, offset, count, whichfork, - whichfork == XFS_DATA_FORK ? &imap : &cmap); -done: - if (whichfork == XFS_COW_FORK) { - if (imap.br_startoff > offset_fsb) { - xfs_trim_extent(&cmap, offset_fsb, - imap.br_startoff - offset_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true); - goto out_unlock; - } - /* ensure we only report blocks we have a reservation for */ - xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); - shared = true; - } - error = xfs_bmbt_to_iomap(ip, iomap, &imap, shared); -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; -} - int xfs_iomap_write_unwritten( xfs_inode_t *ip, @@ -765,6 +539,11 @@ xfs_iomap_write_unwritten( */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; + /* Attach dquots so that bmbt splits are accounted correctly. */ + error = xfs_qm_dqattach(ip); + if (error) + return error; + do { /* * Set up a transaction to convert the range of extents @@ -783,6 +562,11 @@ xfs_iomap_write_unwritten( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto error_on_bmapi_transaction; + /* * Modify the unwritten extent state of the buffer. */ @@ -840,23 +624,42 @@ error_on_bmapi_transaction: static inline bool imap_needs_alloc( struct inode *inode, + unsigned flags, struct xfs_bmbt_irec *imap, int nimaps) { - return !nimaps || - imap->br_startblock == HOLESTARTBLOCK || - imap->br_startblock == DELAYSTARTBLOCK || - (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); + /* don't allocate blocks when just zeroing */ + if (flags & IOMAP_ZERO) + return false; + if (!nimaps || + imap->br_startblock == HOLESTARTBLOCK || + imap->br_startblock == DELAYSTARTBLOCK) + return true; + /* we convert unwritten extents before copying the data for DAX */ + if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN) + return true; + return false; } static inline bool -needs_cow_for_zeroing( +imap_needs_cow( + struct xfs_inode *ip, + unsigned int flags, struct xfs_bmbt_irec *imap, int nimaps) { - return nimaps && - imap->br_startblock != HOLESTARTBLOCK && - imap->br_state != XFS_EXT_UNWRITTEN; + if (!xfs_is_cow_inode(ip)) + return false; + + /* when zeroing we don't have to COW holes or unwritten extents */ + if (flags & IOMAP_ZERO) { + if (!nimaps || + imap->br_startblock == HOLESTARTBLOCK || + imap->br_state == XFS_EXT_UNWRITTEN) + return false; + } + + return true; } static int @@ -872,15 +675,8 @@ xfs_ilock_for_iomap( * COW writes may allocate delalloc space or convert unwritten COW * extents, so we need to make sure to take the lock exclusively here. */ - if (xfs_is_cow_inode(ip) && is_write) { - /* - * FIXME: It could still overwrite on unshared extents and not - * need allocation. - */ - if (flags & IOMAP_NOWAIT) - return -EAGAIN; + if (xfs_is_cow_inode(ip) && is_write) mode = XFS_ILOCK_EXCL; - } /* * Extents not yet cached requires exclusive access, don't block. This @@ -917,111 +713,73 @@ relock: } static int -xfs_file_iomap_begin( +xfs_direct_write_iomap_begin( struct inode *inode, loff_t offset, loff_t length, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct iomap *srcmap) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - struct xfs_bmbt_irec imap; - xfs_fileoff_t offset_fsb, end_fsb; + struct xfs_bmbt_irec imap, cmap; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); int nimaps = 1, error = 0; bool shared = false; + u16 iomap_flags = 0; unsigned lockmode; + ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && !(flags & IOMAP_DIRECT) && - !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { - /* Reserve delalloc blocks for regular writeback. */ - return xfs_file_iomap_begin_delay(inode, offset, length, flags, - iomap); - } - /* - * Lock the inode in the manner required for the specified operation and - * check for as many conditions that would result in blocking as - * possible. This removes most of the non-blocking checks from the - * mapping code below. + * Writes that span EOF might trigger an IO size update on completion, + * so consider them to be dirty for the purposes of O_DSYNC even if + * there is no other metadata changes pending or have been made here. */ + if (offset + length > i_size_read(inode)) + iomap_flags |= IOMAP_F_DIRTY; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; - ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset > mp->m_super->s_maxbytes - length) - length = mp->m_super->s_maxbytes - offset; - offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, offset + length); - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); if (error) goto out_unlock; - if (flags & IOMAP_REPORT) { - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared); - if (error) + if (imap_needs_cow(ip, flags, &imap, nimaps)) { + error = -EAGAIN; + if (flags & IOMAP_NOWAIT) goto out_unlock; - } - - /* Non-modifying mapping requested, so we are done */ - if (!(flags & (IOMAP_WRITE | IOMAP_ZERO))) - goto out_found; - - /* - * Break shared extents if necessary. Checks for non-blocking IO have - * been done up front, so we don't need to do them here. - */ - if (xfs_is_cow_inode(ip)) { - struct xfs_bmbt_irec cmap; - bool directio = (flags & IOMAP_DIRECT); - - /* if zeroing doesn't need COW allocation, then we are done. */ - if ((flags & IOMAP_ZERO) && - !needs_cow_for_zeroing(&imap, nimaps)) - goto out_found; /* may drop and re-acquire the ilock */ - cmap = imap; - error = xfs_reflink_allocate_cow(ip, &cmap, &shared, &lockmode, - directio); + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, + &lockmode, flags & IOMAP_DIRECT); if (error) goto out_unlock; - - /* - * For buffered writes we need to report the address of the - * previous block (if there was any) so that the higher level - * write code can perform read-modify-write operations; we - * won't need the CoW fork mapping until writeback. For direct - * I/O, which must be block aligned, we need to report the - * newly allocated address. If the data fork has a hole, copy - * the COW fork mapping to avoid allocating to the data fork. - */ - if (directio || imap.br_startblock == HOLESTARTBLOCK) - imap = cmap; - + if (shared) + goto out_found_cow; end_fsb = imap.br_startoff + imap.br_blockcount; length = XFS_FSB_TO_B(mp, end_fsb) - offset; } - /* Don't need to allocate over holes when doing zeroing operations. */ - if (flags & IOMAP_ZERO) - goto out_found; + if (imap_needs_alloc(inode, flags, &imap, nimaps)) + goto allocate_blocks; - if (!imap_needs_alloc(inode, &imap, nimaps)) - goto out_found; + xfs_iunlock(ip, lockmode); + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); - /* If nowait is set bail since we are going to make allocations. */ - if (flags & IOMAP_NOWAIT) { - error = -EAGAIN; +allocate_blocks: + error = -EAGAIN; + if (flags & IOMAP_NOWAIT) goto out_unlock; - } /* * We cap the maximum length we map to a sane size to keep the chunks @@ -1033,48 +791,273 @@ xfs_file_iomap_begin( * lower level functions are updated. */ length = min_t(loff_t, length, 1024 * PAGE_SIZE); + end_fsb = xfs_iomap_end_fsb(mp, offset, length); - /* - * xfs_iomap_write_direct() expects the shared lock. It is unlocked on - * return. - */ - if (lockmode == XFS_ILOCK_EXCL) - xfs_ilock_demote(ip, lockmode); - error = xfs_iomap_write_direct(ip, offset, length, &imap, - nimaps); + if (offset + length > XFS_ISIZE(ip)) + end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); + else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) + end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); + xfs_iunlock(ip, lockmode); + + error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, + &imap); if (error) return error; - iomap->flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW); -out_finish: - return xfs_bmbt_to_iomap(ip, iomap, &imap, shared); - -out_found: - ASSERT(nimaps); +out_found_cow: xfs_iunlock(ip, lockmode); - trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - goto out_finish; + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + if (imap.br_startblock != HOLESTARTBLOCK) { + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + if (error) + return error; + } + return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); out_unlock: xfs_iunlock(ip, lockmode); return error; } +const struct iomap_ops xfs_direct_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, +}; + static int -xfs_file_iomap_end_delalloc( - struct xfs_inode *ip, +xfs_buffered_write_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t count, + unsigned flags, + struct iomap *iomap, + struct iomap *srcmap) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); + struct xfs_bmbt_irec imap, cmap; + struct xfs_iext_cursor icur, ccur; + xfs_fsblock_t prealloc_blocks = 0; + bool eof = false, cow_eof = false, shared = false; + int allocfork = XFS_DATA_FORK; + int error = 0; + + /* we can't use delayed allocations when using extent size hints */ + if (xfs_get_extsz_hint(ip)) + return xfs_direct_write_iomap_begin(inode, offset, count, + flags, iomap, srcmap); + + ASSERT(!XFS_IS_REALTIME_INODE(ip)); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, XFS_DATA_FORK)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + error = -EFSCORRUPTED; + goto out_unlock; + } + + XFS_STATS_INC(mp, xs_blk_mapw); + + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + goto out_unlock; + } + + /* + * Search the data fork fork first to look up our source mapping. We + * always need the data fork map, as we have to return it to the + * iomap code so that the higher level write code can read data in to + * perform read-modify-write cycles for unaligned writes. + */ + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); + if (eof) + imap.br_startoff = end_fsb; /* fake hole until the end */ + + /* We never need to allocate blocks for zeroing a hole. */ + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); + goto out_unlock; + } + + /* + * Search the COW fork extent list even if we did not find a data fork + * extent. This serves two purposes: first this implements the + * speculative preallocation using cowextsize, so that we also unshare + * block adjacent to shared blocks instead of just the shared blocks + * themselves. Second the lookup in the extent list is generally faster + * than going out to the shared extent tree. + */ + if (xfs_is_cow_inode(ip)) { + if (!ip->i_cowfp) { + ASSERT(!xfs_is_reflink_inode(ip)); + xfs_ifork_init_cow(ip); + } + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, + &ccur, &cmap); + if (!cow_eof && cmap.br_startoff <= offset_fsb) { + trace_xfs_reflink_cow_found(ip, &cmap); + goto found_cow; + } + } + + if (imap.br_startoff <= offset_fsb) { + /* + * For reflink files we may need a delalloc reservation when + * overwriting shared extents. This includes zeroing of + * existing extents that contain data. + */ + if (!xfs_is_cow_inode(ip) || + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto found_imap; + } + + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); + + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_inode_need_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + + /* Not shared? Just report the (potentially capped) extent. */ + if (!shared) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto found_imap; + } + + /* + * Fork all the shared blocks from our write offset until the + * end of the extent. + */ + allocfork = XFS_COW_FORK; + end_fsb = imap.br_startoff + imap.br_blockcount; + } else { + /* + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES + * pages to keep the chunks of work done where somewhat + * symmetric with the work writeback does. This is a completely + * arbitrary number pulled out of thin air. + * + * Note that the values needs to be less than 32-bits wide until + * the lower level functions are updated. + */ + count = min_t(loff_t, count, 1024 * PAGE_SIZE); + end_fsb = xfs_iomap_end_fsb(mp, offset, count); + + if (xfs_is_always_cow_inode(ip)) + allocfork = XFS_COW_FORK; + } + + error = xfs_qm_dqattach_locked(ip, false); + if (error) + goto out_unlock; + + if (eof) { + prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset, + count, &icur); + if (prealloc_blocks) { + xfs_extlen_t align; + xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; + + end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; + + align = xfs_eof_alignment(ip); + if (align) + p_end_fsb = roundup_64(p_end_fsb, align); + + p_end_fsb = min(p_end_fsb, + XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; + } + } + +retry: + error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, + allocfork == XFS_DATA_FORK ? &imap : &cmap, + allocfork == XFS_DATA_FORK ? &icur : &ccur, + allocfork == XFS_DATA_FORK ? eof : cow_eof); + switch (error) { + case 0: + break; + case -ENOSPC: + case -EDQUOT: + /* retry without any preallocation */ + trace_xfs_delalloc_enospc(ip, offset, count); + if (prealloc_blocks) { + prealloc_blocks = 0; + goto retry; + } + /*FALLTHRU*/ + default: + goto out_unlock; + } + + if (allocfork == XFS_COW_FORK) { + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); + goto found_cow; + } + + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW); + +found_imap: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + +found_cow: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (imap.br_startoff <= offset_fsb) { + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + if (error) + return error; + } else { + xfs_trim_extent(&cmap, offset_fsb, + imap.br_startoff - offset_fsb); + } + return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +static int +xfs_buffered_write_iomap_end( + struct inode *inode, loff_t offset, loff_t length, ssize_t written, + unsigned flags, struct iomap *iomap) { + struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; + if (iomap->type != IOMAP_DELALLOC) + return 0; + /* * Behave as if the write failed if drop writes is enabled. Set the NEW * flag to force delalloc cleanup. @@ -1119,24 +1102,51 @@ xfs_file_iomap_end_delalloc( return 0; } +const struct iomap_ops xfs_buffered_write_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, + .iomap_end = xfs_buffered_write_iomap_end, +}; + static int -xfs_file_iomap_end( +xfs_read_iomap_begin( struct inode *inode, loff_t offset, loff_t length, - ssize_t written, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct iomap *srcmap) { - if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) - return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written, iomap); - return 0; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec imap; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); + int nimaps = 1, error = 0; + bool shared = false; + unsigned lockmode; + + ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + error = xfs_ilock_for_iomap(ip, flags, &lockmode); + if (error) + return error; + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, 0); + if (!error && (flags & IOMAP_REPORT)) + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + xfs_iunlock(ip, lockmode); + + if (error) + return error; + trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); + return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0); } -const struct iomap_ops xfs_iomap_ops = { - .iomap_begin = xfs_file_iomap_begin, - .iomap_end = xfs_file_iomap_end, +const struct iomap_ops xfs_read_iomap_ops = { + .iomap_begin = xfs_read_iomap_begin, }; static int @@ -1145,7 +1155,8 @@ xfs_seek_iomap_begin( loff_t offset, loff_t length, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct iomap *srcmap) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1178,8 +1189,7 @@ xfs_seek_iomap_begin( /* * Fake a hole until the end of the file. */ - data_fsb = min(XFS_B_TO_FSB(mp, offset + length), - XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); + data_fsb = xfs_iomap_end_fsb(mp, offset, length); } /* @@ -1193,7 +1203,7 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true); + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1215,7 +1225,7 @@ xfs_seek_iomap_begin( imap.br_state = XFS_EXT_NORM; done: xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1231,7 +1241,8 @@ xfs_xattr_iomap_begin( loff_t offset, loff_t length, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct iomap *srcmap) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1261,7 +1272,7 @@ out_unlock: if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, false); + return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); } const struct iomap_ops xfs_xattr_iomap_ops = { |