XFS Bug fixes for 6.12-rc4
* Fix integer overflow in xrep_bmap * Fix stale dealloc punching for COW IO Signed-off-by: Carlos Maiolino <cem@kernel.org> -----BEGIN PGP SIGNATURE----- iJUEABMJAB0WIQQMHYkcUKcy4GgPe2RGdaER5QtfpgUCZw5LIwAKCRBGdaER5Qtf puRlAYDezbvs1dDSkKIGOt3inGdLptNAu4qniXBUkbYI9BzmtIVDueWP4Wo0dV3d gu3xrWQBfjFXdmEuBlwLuAFrp07AN18BVMj+DWCiEShsPHSoSPcF/IrDiz4BHvGv MKYq9CywFw== =Gj9b -----END PGP SIGNATURE----- Merge tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs fixes from Carlos Maiolino: - Fix integer overflow in xrep_bmap - Fix stale dealloc punching for COW IO * tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: punch delalloc extents from the COW fork for COW writes xfs: set IOMAP_F_SHARED for all COW fork allocations xfs: share more code in xfs_buffered_write_iomap_begin xfs: support the COW fork in xfs_bmap_punch_delalloc_range xfs: IOMAP_ZERO and IOMAP_UNSHARE already hold invalidate_lock xfs: take XFS_MMAPLOCK_EXCL xfs_file_write_zero_eof xfs: factor out a xfs_file_write_zero_eof helper iomap: move locking out of iomap_write_delalloc_release iomap: remove iomap_file_buffered_write_punch_delalloc iomap: factor out a iomap_last_written_block helper xfs: fix integer overflow in xrep_bmap
This commit is contained in:
commit
568570fdf2
@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
|
|||||||
such `reservations
|
such `reservations
|
||||||
<https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/>`_
|
<https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/>`_
|
||||||
because writeback will not consume the reservation.
|
because writeback will not consume the reservation.
|
||||||
The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
|
The ``iomap_write_delalloc_release`` can be called from a
|
||||||
``->iomap_end`` function to find all the clean areas of the folios
|
``->iomap_end`` function to find all the clean areas of the folios
|
||||||
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
|
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
|
||||||
It takes the ``invalidate_lock``.
|
It takes the ``invalidate_lock``.
|
||||||
|
@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* When a short write occurs, the filesystem might need to use ->iomap_end
|
||||||
|
* to remove space reservations created in ->iomap_begin.
|
||||||
|
*
|
||||||
|
* For filesystems that use delayed allocation, there can be dirty pages over
|
||||||
|
* the delalloc extent outside the range of a short write but still within the
|
||||||
|
* delalloc extent allocated for this iomap if the write raced with page
|
||||||
|
* faults.
|
||||||
|
*
|
||||||
* Punch out all the delalloc blocks in the range given except for those that
|
* Punch out all the delalloc blocks in the range given except for those that
|
||||||
* have dirty data still pending in the page cache - those are going to be
|
* have dirty data still pending in the page cache - those are going to be
|
||||||
* written and so must still retain the delalloc backing for writeback.
|
* written and so must still retain the delalloc backing for writeback.
|
||||||
*
|
*
|
||||||
|
* The punch() callback *must* only punch delalloc extents in the range passed
|
||||||
|
* to it. It must skip over all other types of extents in the range and leave
|
||||||
|
* them completely unchanged. It must do this punch atomically with respect to
|
||||||
|
* other extent modifications.
|
||||||
|
*
|
||||||
|
* The punch() callback may be called with a folio locked to prevent writeback
|
||||||
|
* extent allocation racing at the edge of the range we are currently punching.
|
||||||
|
* The locked folio may or may not cover the range being punched, so it is not
|
||||||
|
* safe for the punch() callback to lock folios itself.
|
||||||
|
*
|
||||||
|
* Lock order is:
|
||||||
|
*
|
||||||
|
* inode->i_rwsem (shared or exclusive)
|
||||||
|
* inode->i_mapping->invalidate_lock (exclusive)
|
||||||
|
* folio_lock()
|
||||||
|
* ->punch
|
||||||
|
* internal filesystem allocation lock
|
||||||
|
*
|
||||||
* As we are scanning the page cache for data, we don't need to reimplement the
|
* As we are scanning the page cache for data, we don't need to reimplement the
|
||||||
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
|
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
|
||||||
* start and end of data ranges correctly even for sub-folio block sizes. This
|
* start and end of data ranges correctly even for sub-folio block sizes. This
|
||||||
@ -1177,7 +1203,7 @@ static void iomap_write_delalloc_scan(struct inode *inode,
|
|||||||
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
|
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
|
||||||
* the code to subtle off-by-one bugs....
|
* the code to subtle off-by-one bugs....
|
||||||
*/
|
*/
|
||||||
static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||||
loff_t end_byte, unsigned flags, struct iomap *iomap,
|
loff_t end_byte, unsigned flags, struct iomap *iomap,
|
||||||
iomap_punch_t punch)
|
iomap_punch_t punch)
|
||||||
{
|
{
|
||||||
@ -1185,12 +1211,13 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||||||
loff_t scan_end_byte = min(i_size_read(inode), end_byte);
|
loff_t scan_end_byte = min(i_size_read(inode), end_byte);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock the mapping to avoid races with page faults re-instantiating
|
* The caller must hold invalidate_lock to avoid races with page faults
|
||||||
* folios and dirtying them via ->page_mkwrite whilst we walk the
|
* re-instantiating folios and dirtying them via ->page_mkwrite whilst
|
||||||
* cache and perform delalloc extent removal. Failing to do this can
|
* we walk the cache and perform delalloc extent removal. Failing to do
|
||||||
* leave dirty pages with no space reservation in the cache.
|
* this can leave dirty pages with no space reservation in the cache.
|
||||||
*/
|
*/
|
||||||
filemap_invalidate_lock(inode->i_mapping);
|
lockdep_assert_held_write(&inode->i_mapping->invalidate_lock);
|
||||||
|
|
||||||
while (start_byte < scan_end_byte) {
|
while (start_byte < scan_end_byte) {
|
||||||
loff_t data_end;
|
loff_t data_end;
|
||||||
|
|
||||||
@ -1207,7 +1234,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||||||
if (start_byte == -ENXIO || start_byte == scan_end_byte)
|
if (start_byte == -ENXIO || start_byte == scan_end_byte)
|
||||||
break;
|
break;
|
||||||
if (WARN_ON_ONCE(start_byte < 0))
|
if (WARN_ON_ONCE(start_byte < 0))
|
||||||
goto out_unlock;
|
return;
|
||||||
WARN_ON_ONCE(start_byte < punch_start_byte);
|
WARN_ON_ONCE(start_byte < punch_start_byte);
|
||||||
WARN_ON_ONCE(start_byte > scan_end_byte);
|
WARN_ON_ONCE(start_byte > scan_end_byte);
|
||||||
|
|
||||||
@ -1218,7 +1245,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||||||
data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
|
data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
|
||||||
scan_end_byte, SEEK_HOLE);
|
scan_end_byte, SEEK_HOLE);
|
||||||
if (WARN_ON_ONCE(data_end < 0))
|
if (WARN_ON_ONCE(data_end < 0))
|
||||||
goto out_unlock;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we race with post-direct I/O invalidation of the page cache,
|
* If we race with post-direct I/O invalidation of the page cache,
|
||||||
@ -1240,74 +1267,8 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||||||
if (punch_start_byte < end_byte)
|
if (punch_start_byte < end_byte)
|
||||||
punch(inode, punch_start_byte, end_byte - punch_start_byte,
|
punch(inode, punch_start_byte, end_byte - punch_start_byte,
|
||||||
iomap);
|
iomap);
|
||||||
out_unlock:
|
|
||||||
filemap_invalidate_unlock(inode->i_mapping);
|
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
|
||||||
/*
|
|
||||||
* When a short write occurs, the filesystem may need to remove reserved space
|
|
||||||
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
|
|
||||||
* filesystems that use delayed allocation, we need to punch out delalloc
|
|
||||||
* extents from the range that are not dirty in the page cache. As the write can
|
|
||||||
* race with page faults, there can be dirty pages over the delalloc extent
|
|
||||||
* outside the range of a short write but still within the delalloc extent
|
|
||||||
* allocated for this iomap.
|
|
||||||
*
|
|
||||||
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
|
|
||||||
* simplify range iterations.
|
|
||||||
*
|
|
||||||
* The punch() callback *must* only punch delalloc extents in the range passed
|
|
||||||
* to it. It must skip over all other types of extents in the range and leave
|
|
||||||
* them completely unchanged. It must do this punch atomically with respect to
|
|
||||||
* other extent modifications.
|
|
||||||
*
|
|
||||||
* The punch() callback may be called with a folio locked to prevent writeback
|
|
||||||
* extent allocation racing at the edge of the range we are currently punching.
|
|
||||||
* The locked folio may or may not cover the range being punched, so it is not
|
|
||||||
* safe for the punch() callback to lock folios itself.
|
|
||||||
*
|
|
||||||
* Lock order is:
|
|
||||||
*
|
|
||||||
* inode->i_rwsem (shared or exclusive)
|
|
||||||
* inode->i_mapping->invalidate_lock (exclusive)
|
|
||||||
* folio_lock()
|
|
||||||
* ->punch
|
|
||||||
* internal filesystem allocation lock
|
|
||||||
*/
|
|
||||||
void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
|
|
||||||
loff_t pos, loff_t length, ssize_t written, unsigned flags,
|
|
||||||
struct iomap *iomap, iomap_punch_t punch)
|
|
||||||
{
|
|
||||||
loff_t start_byte;
|
|
||||||
loff_t end_byte;
|
|
||||||
unsigned int blocksize = i_blocksize(inode);
|
|
||||||
|
|
||||||
if (iomap->type != IOMAP_DELALLOC)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* If we didn't reserve the blocks, we're not allowed to punch them. */
|
|
||||||
if (!(iomap->flags & IOMAP_F_NEW))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* start_byte refers to the first unused block after a short write. If
|
|
||||||
* nothing was written, round offset down to point at the first block in
|
|
||||||
* the range.
|
|
||||||
*/
|
|
||||||
if (unlikely(!written))
|
|
||||||
start_byte = round_down(pos, blocksize);
|
|
||||||
else
|
|
||||||
start_byte = round_up(pos + written, blocksize);
|
|
||||||
end_byte = round_up(pos + length, blocksize);
|
|
||||||
|
|
||||||
/* Nothing to do if we've written the entire delalloc extent */
|
|
||||||
if (start_byte >= end_byte)
|
|
||||||
return;
|
|
||||||
|
|
||||||
iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
|
|
||||||
punch);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
|
|
||||||
|
|
||||||
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
|
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
|
||||||
{
|
{
|
||||||
|
@ -801,7 +801,7 @@ xrep_bmap(
|
|||||||
{
|
{
|
||||||
struct xrep_bmap *rb;
|
struct xrep_bmap *rb;
|
||||||
char *descr;
|
char *descr;
|
||||||
unsigned int max_bmbt_recs;
|
xfs_extnum_t max_bmbt_recs;
|
||||||
bool large_extcount;
|
bool large_extcount;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ xfs_end_ioend(
|
|||||||
if (unlikely(error)) {
|
if (unlikely(error)) {
|
||||||
if (ioend->io_flags & IOMAP_F_SHARED) {
|
if (ioend->io_flags & IOMAP_F_SHARED) {
|
||||||
xfs_reflink_cancel_cow_range(ip, offset, size, true);
|
xfs_reflink_cancel_cow_range(ip, offset, size, true);
|
||||||
xfs_bmap_punch_delalloc_range(ip, offset,
|
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
|
||||||
offset + size);
|
offset + size);
|
||||||
}
|
}
|
||||||
goto done;
|
goto done;
|
||||||
@ -456,7 +456,7 @@ xfs_discard_folio(
|
|||||||
* byte of the next folio. Hence the end offset is only dependent on the
|
* byte of the next folio. Hence the end offset is only dependent on the
|
||||||
* folio itself and not the start offset that is passed in.
|
* folio itself and not the start offset that is passed in.
|
||||||
*/
|
*/
|
||||||
xfs_bmap_punch_delalloc_range(ip, pos,
|
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
|
||||||
folio_pos(folio) + folio_size(folio));
|
folio_pos(folio) + folio_size(folio));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,11 +442,12 @@ out_unlock_iolock:
|
|||||||
void
|
void
|
||||||
xfs_bmap_punch_delalloc_range(
|
xfs_bmap_punch_delalloc_range(
|
||||||
struct xfs_inode *ip,
|
struct xfs_inode *ip,
|
||||||
|
int whichfork,
|
||||||
xfs_off_t start_byte,
|
xfs_off_t start_byte,
|
||||||
xfs_off_t end_byte)
|
xfs_off_t end_byte)
|
||||||
{
|
{
|
||||||
struct xfs_mount *mp = ip->i_mount;
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
struct xfs_ifork *ifp = &ip->i_df;
|
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
|
||||||
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
|
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
|
||||||
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
|
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
|
||||||
struct xfs_bmbt_irec got, del;
|
struct xfs_bmbt_irec got, del;
|
||||||
@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
|
xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
|
||||||
if (!xfs_iext_get_extent(ifp, &icur, &got))
|
if (!xfs_iext_get_extent(ifp, &icur, &got))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
|
||||||
|
xfs_inode_clear_cowblocks_tag(ip);
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
}
|
}
|
||||||
@ -580,7 +584,7 @@ xfs_free_eofblocks(
|
|||||||
*/
|
*/
|
||||||
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
|
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
|
||||||
if (ip->i_delayed_blks) {
|
if (ip->i_delayed_blks) {
|
||||||
xfs_bmap_punch_delalloc_range(ip,
|
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
|
||||||
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
|
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
|
||||||
LLONG_MAX);
|
LLONG_MAX);
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
|
|||||||
}
|
}
|
||||||
#endif /* CONFIG_XFS_RT */
|
#endif /* CONFIG_XFS_RT */
|
||||||
|
|
||||||
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
|
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
|
||||||
xfs_off_t start_byte, xfs_off_t end_byte);
|
xfs_off_t start_byte, xfs_off_t end_byte);
|
||||||
|
|
||||||
struct kgetbmap {
|
struct kgetbmap {
|
||||||
|
@ -347,10 +347,83 @@ xfs_file_splice_read(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Take care of zeroing post-EOF blocks when they might exist.
|
||||||
|
*
|
||||||
|
* Returns 0 if successfully, a negative error for a failure, or 1 if this
|
||||||
|
* function dropped the iolock and reacquired it exclusively and the caller
|
||||||
|
* needs to restart the write sanity checks.
|
||||||
|
*/
|
||||||
|
static ssize_t
|
||||||
|
xfs_file_write_zero_eof(
|
||||||
|
struct kiocb *iocb,
|
||||||
|
struct iov_iter *from,
|
||||||
|
unsigned int *iolock,
|
||||||
|
size_t count,
|
||||||
|
bool *drained_dio)
|
||||||
|
{
|
||||||
|
struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
|
||||||
|
loff_t isize;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to serialise against EOF updates that occur in IO completions
|
||||||
|
* here. We want to make sure that nobody is changing the size while
|
||||||
|
* we do this check until we have placed an IO barrier (i.e. hold
|
||||||
|
* XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
|
||||||
|
* spinlock effectively forms a memory barrier once we have
|
||||||
|
* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
|
||||||
|
* hence be able to correctly determine if we need to run zeroing.
|
||||||
|
*/
|
||||||
|
spin_lock(&ip->i_flags_lock);
|
||||||
|
isize = i_size_read(VFS_I(ip));
|
||||||
|
if (iocb->ki_pos <= isize) {
|
||||||
|
spin_unlock(&ip->i_flags_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
spin_unlock(&ip->i_flags_lock);
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
if (!*drained_dio) {
|
||||||
|
/*
|
||||||
|
* If zeroing is needed and we are currently holding the iolock
|
||||||
|
* shared, we need to update it to exclusive which implies
|
||||||
|
* having to redo all checks before.
|
||||||
|
*/
|
||||||
|
if (*iolock == XFS_IOLOCK_SHARED) {
|
||||||
|
xfs_iunlock(ip, *iolock);
|
||||||
|
*iolock = XFS_IOLOCK_EXCL;
|
||||||
|
xfs_ilock(ip, *iolock);
|
||||||
|
iov_iter_reexpand(from, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We now have an IO submission barrier in place, but AIO can do
|
||||||
|
* EOF updates during IO completion and hence we now need to
|
||||||
|
* wait for all of them to drain. Non-AIO DIO will have drained
|
||||||
|
* before we are given the XFS_IOLOCK_EXCL, and so for most
|
||||||
|
* cases this wait is a no-op.
|
||||||
|
*/
|
||||||
|
inode_dio_wait(VFS_I(ip));
|
||||||
|
*drained_dio = true;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
|
||||||
|
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
|
||||||
|
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Common pre-write limit and setup checks.
|
* Common pre-write limit and setup checks.
|
||||||
*
|
*
|
||||||
* Called with the iolocked held either shared and exclusive according to
|
* Called with the iolock held either shared and exclusive according to
|
||||||
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
|
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
|
||||||
* if called for a direct write beyond i_size.
|
* if called for a direct write beyond i_size.
|
||||||
*/
|
*/
|
||||||
@ -360,13 +433,10 @@ xfs_file_write_checks(
|
|||||||
struct iov_iter *from,
|
struct iov_iter *from,
|
||||||
unsigned int *iolock)
|
unsigned int *iolock)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
||||||
struct inode *inode = file->f_mapping->host;
|
|
||||||
struct xfs_inode *ip = XFS_I(inode);
|
|
||||||
ssize_t error = 0;
|
|
||||||
size_t count = iov_iter_count(from);
|
size_t count = iov_iter_count(from);
|
||||||
bool drained_dio = false;
|
bool drained_dio = false;
|
||||||
loff_t isize;
|
ssize_t error;
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
error = generic_write_checks(iocb, from);
|
error = generic_write_checks(iocb, from);
|
||||||
@ -389,7 +459,7 @@ restart:
|
|||||||
* exclusively.
|
* exclusively.
|
||||||
*/
|
*/
|
||||||
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
|
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
|
||||||
xfs_iunlock(ip, *iolock);
|
xfs_iunlock(XFS_I(inode), *iolock);
|
||||||
*iolock = XFS_IOLOCK_EXCL;
|
*iolock = XFS_IOLOCK_EXCL;
|
||||||
error = xfs_ilock_iocb(iocb, *iolock);
|
error = xfs_ilock_iocb(iocb, *iolock);
|
||||||
if (error) {
|
if (error) {
|
||||||
@ -400,64 +470,24 @@ restart:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the offset is beyond the size of the file, we need to zero any
|
* If the offset is beyond the size of the file, we need to zero all
|
||||||
* blocks that fall between the existing EOF and the start of this
|
* blocks that fall between the existing EOF and the start of this
|
||||||
* write. If zeroing is needed and we are currently holding the iolock
|
* write.
|
||||||
* shared, we need to update it to exclusive which implies having to
|
|
||||||
* redo all checks before.
|
|
||||||
*
|
*
|
||||||
* We need to serialise against EOF updates that occur in IO completions
|
* We can do an unlocked check for i_size here safely as I/O completion
|
||||||
* here. We want to make sure that nobody is changing the size while we
|
* can only extend EOF. Truncate is locked out at this point, so the
|
||||||
* do this check until we have placed an IO barrier (i.e. hold the
|
* EOF can not move backwards, only forwards. Hence we only need to take
|
||||||
* XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
|
* the slow path when we are at or beyond the current EOF.
|
||||||
* spinlock effectively forms a memory barrier once we have the
|
|
||||||
* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
|
|
||||||
* hence be able to correctly determine if we need to run zeroing.
|
|
||||||
*
|
|
||||||
* We can do an unlocked check here safely as IO completion can only
|
|
||||||
* extend EOF. Truncate is locked out at this point, so the EOF can
|
|
||||||
* not move backwards, only forwards. Hence we only need to take the
|
|
||||||
* slow path and spin locks when we are at or beyond the current EOF.
|
|
||||||
*/
|
*/
|
||||||
if (iocb->ki_pos <= i_size_read(inode))
|
if (iocb->ki_pos > i_size_read(inode)) {
|
||||||
goto out;
|
error = xfs_file_write_zero_eof(iocb, from, iolock, count,
|
||||||
|
&drained_dio);
|
||||||
spin_lock(&ip->i_flags_lock);
|
if (error == 1)
|
||||||
isize = i_size_read(inode);
|
|
||||||
if (iocb->ki_pos > isize) {
|
|
||||||
spin_unlock(&ip->i_flags_lock);
|
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
|
||||||
return -EAGAIN;
|
|
||||||
|
|
||||||
if (!drained_dio) {
|
|
||||||
if (*iolock == XFS_IOLOCK_SHARED) {
|
|
||||||
xfs_iunlock(ip, *iolock);
|
|
||||||
*iolock = XFS_IOLOCK_EXCL;
|
|
||||||
xfs_ilock(ip, *iolock);
|
|
||||||
iov_iter_reexpand(from, count);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* We now have an IO submission barrier in place, but
|
|
||||||
* AIO can do EOF updates during IO completion and hence
|
|
||||||
* we now need to wait for all of them to drain. Non-AIO
|
|
||||||
* DIO will have drained before we are given the
|
|
||||||
* XFS_IOLOCK_EXCL, and so for most cases this wait is a
|
|
||||||
* no-op.
|
|
||||||
*/
|
|
||||||
inode_dio_wait(inode);
|
|
||||||
drained_dio = true;
|
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
|
||||||
|
|
||||||
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
|
|
||||||
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
|
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
} else
|
}
|
||||||
spin_unlock(&ip->i_flags_lock);
|
|
||||||
|
|
||||||
out:
|
|
||||||
return kiocb_modified(iocb);
|
return kiocb_modified(iocb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -975,6 +975,7 @@ xfs_buffered_write_iomap_begin(
|
|||||||
int allocfork = XFS_DATA_FORK;
|
int allocfork = XFS_DATA_FORK;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
unsigned int lockmode = XFS_ILOCK_EXCL;
|
unsigned int lockmode = XFS_ILOCK_EXCL;
|
||||||
|
unsigned int iomap_flags = 0;
|
||||||
u64 seq;
|
u64 seq;
|
||||||
|
|
||||||
if (xfs_is_shutdown(mp))
|
if (xfs_is_shutdown(mp))
|
||||||
@ -1145,6 +1146,11 @@ xfs_buffered_write_iomap_begin(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
|
||||||
|
* them out if the write happens to fail.
|
||||||
|
*/
|
||||||
|
iomap_flags |= IOMAP_F_NEW;
|
||||||
if (allocfork == XFS_COW_FORK) {
|
if (allocfork == XFS_COW_FORK) {
|
||||||
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
||||||
end_fsb - offset_fsb, prealloc_blocks, &cmap,
|
end_fsb - offset_fsb, prealloc_blocks, &cmap,
|
||||||
@ -1162,19 +1168,11 @@ xfs_buffered_write_iomap_begin(
|
|||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
|
||||||
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
|
|
||||||
* them out if the write happens to fail.
|
|
||||||
*/
|
|
||||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
|
|
||||||
xfs_iunlock(ip, lockmode);
|
|
||||||
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
|
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
|
||||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
|
|
||||||
|
|
||||||
found_imap:
|
found_imap:
|
||||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
seq = xfs_iomap_inode_sequence(ip, iomap_flags);
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_iunlock(ip, lockmode);
|
||||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
|
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
|
||||||
|
|
||||||
convert_delay:
|
convert_delay:
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_iunlock(ip, lockmode);
|
||||||
@ -1188,20 +1186,20 @@ convert_delay:
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
found_cow:
|
found_cow:
|
||||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
|
||||||
if (imap.br_startoff <= offset_fsb) {
|
if (imap.br_startoff <= offset_fsb) {
|
||||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
|
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
|
||||||
|
xfs_iomap_inode_sequence(ip, 0));
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
|
} else {
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_trim_extent(&cmap, offset_fsb,
|
||||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
|
imap.br_startoff - offset_fsb);
|
||||||
IOMAP_F_SHARED, seq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
|
iomap_flags |= IOMAP_F_SHARED;
|
||||||
|
seq = xfs_iomap_inode_sequence(ip, iomap_flags);
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_iunlock(ip, lockmode);
|
||||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
|
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, iomap_flags, seq);
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
xfs_iunlock(ip, lockmode);
|
xfs_iunlock(ip, lockmode);
|
||||||
@ -1215,7 +1213,10 @@ xfs_buffered_write_delalloc_punch(
|
|||||||
loff_t length,
|
loff_t length,
|
||||||
struct iomap *iomap)
|
struct iomap *iomap)
|
||||||
{
|
{
|
||||||
xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length);
|
xfs_bmap_punch_delalloc_range(XFS_I(inode),
|
||||||
|
(iomap->flags & IOMAP_F_SHARED) ?
|
||||||
|
XFS_COW_FORK : XFS_DATA_FORK,
|
||||||
|
offset, offset + length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -1227,8 +1228,30 @@ xfs_buffered_write_iomap_end(
|
|||||||
unsigned flags,
|
unsigned flags,
|
||||||
struct iomap *iomap)
|
struct iomap *iomap)
|
||||||
{
|
{
|
||||||
iomap_file_buffered_write_punch_delalloc(inode, offset, length, written,
|
loff_t start_byte, end_byte;
|
||||||
flags, iomap, &xfs_buffered_write_delalloc_punch);
|
|
||||||
|
/* If we didn't reserve the blocks, we're not allowed to punch them. */
|
||||||
|
if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Nothing to do if we've written the entire delalloc extent */
|
||||||
|
start_byte = iomap_last_written_block(inode, offset, written);
|
||||||
|
end_byte = round_up(offset + length, i_blocksize(inode));
|
||||||
|
if (start_byte >= end_byte)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* For zeroing operations the callers already hold invalidate_lock. */
|
||||||
|
if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
|
||||||
|
rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
|
||||||
|
iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
|
||||||
|
iomap, xfs_buffered_write_delalloc_punch);
|
||||||
|
} else {
|
||||||
|
filemap_invalidate_lock(inode->i_mapping);
|
||||||
|
iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
|
||||||
|
iomap, xfs_buffered_write_delalloc_punch);
|
||||||
|
filemap_invalidate_unlock(inode->i_mapping);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1435,6 +1458,8 @@ xfs_zero_range(
|
|||||||
{
|
{
|
||||||
struct inode *inode = VFS_I(ip);
|
struct inode *inode = VFS_I(ip);
|
||||||
|
|
||||||
|
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
|
||||||
|
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
return dax_zero_range(inode, pos, len, did_zero,
|
return dax_zero_range(inode, pos, len, did_zero,
|
||||||
&xfs_dax_write_iomap_ops);
|
&xfs_dax_write_iomap_ops);
|
||||||
|
@ -256,6 +256,20 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
|
|||||||
return &i->iomap;
|
return &i->iomap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the file offset for the first unchanged block after a short write.
|
||||||
|
*
|
||||||
|
* If nothing was written, round @pos down to point at the first block in
|
||||||
|
* the range, else round up to include the partially written block.
|
||||||
|
*/
|
||||||
|
static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos,
|
||||||
|
ssize_t written)
|
||||||
|
{
|
||||||
|
if (unlikely(!written))
|
||||||
|
return round_down(pos, i_blocksize(inode));
|
||||||
|
return round_up(pos + written, i_blocksize(inode));
|
||||||
|
}
|
||||||
|
|
||||||
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
|
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
|
||||||
const struct iomap_ops *ops, void *private);
|
const struct iomap_ops *ops, void *private);
|
||||||
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
|
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
|
||||||
@ -276,9 +290,9 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
|
|||||||
|
|
||||||
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
|
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
|
||||||
struct iomap *iomap);
|
struct iomap *iomap);
|
||||||
void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
|
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||||
loff_t length, ssize_t written, unsigned flag,
|
loff_t end_byte, unsigned flags, struct iomap *iomap,
|
||||||
struct iomap *iomap, iomap_punch_t punch);
|
iomap_punch_t punch);
|
||||||
|
|
||||||
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||||
u64 start, u64 len, const struct iomap_ops *ops);
|
u64 start, u64 len, const struct iomap_ops *ops);
|
||||||
|
Loading…
Reference in New Issue
Block a user