From 2a492ff66673c38a77d0815d67b9a8cce2ef57f8 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 15 Oct 2024 15:15:09 +0530 Subject: [PATCH 1/4] xfs: Check for delayed allocations before setting extsize Extsize should only be allowed to be set on files with no data in it. For this, we check if the files have extents but miss to check if delayed extents are present. This patch adds that check. While we are at it, also refactor this check into a helper since it's used in some other places as well like xfs_inactive() or xfs_ioctl_setattr_xflags() **Without the patch (SUCCEEDS)** $ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536' wrote 1024/1024 bytes at offset 0 1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec) **With the patch (FAILS as expected)** $ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536' wrote 1024/1024 bytes at offset 0 1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec) xfs_io: FS_IOC_FSSETXATTR testfile: Invalid argument Fixes: e94af02a9cd7 ("[XFS] fix old xfs_setattr mis-merge from irix; mostly harmless esp if not using xfs rt") Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: John Garry Signed-off-by: Ojaswin Mujoo Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode.h | 5 +++++ fs/xfs/xfs_ioctl.c | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bcc277fc0a83..19dcb569a3e7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1409,7 +1409,7 @@ xfs_inactive( if (S_ISREG(VFS_I(ip)->i_mode) && (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 || - ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) + xfs_inode_has_filedata(ip))) truncate = 1; if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 97ed912306fd..03944b6c5fba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -292,6 +292,11 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip) return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); } +static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip) +{ + return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0; +} + /* * Check if an inode has any data in the COW fork. This might be often false * even for inodes with the reflink flag when there is no pending COW operation. diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a20d426ef021..2567fd2a0994 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -481,7 +481,7 @@ xfs_ioctl_setattr_xflags( if (rtflag != XFS_IS_REALTIME_INODE(ip)) { /* Can't change realtime flag if any extents are allocated. */ - if (ip->i_df.if_nextents || ip->i_delayed_blks) + if (xfs_inode_has_filedata(ip)) return -EINVAL; /* @@ -602,7 +602,7 @@ xfs_ioctl_setattr_check_extsize( if (!fa->fsx_valid) return 0; - if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && + if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) && XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize) return -EINVAL; From 3ef22684038aa577c10972ee9c6a2455f5fac941 Mon Sep 17 00:00:00 2001 From: Chi Zhiling Date: Fri, 25 Oct 2024 10:33:20 +0800 Subject: [PATCH 2/4] xfs: Reduce unnecessary searches when searching for the best extents Recently, we found that the CPU spent a lot of time in xfs_alloc_ag_vextent_size when the filesystem has millions of fragmented spaces. The reason is that we conducted much extra searching for extents that could not yield a better result, and these searches would cost a lot of time when there were millions of extents to search through. Even if we get the same result length, we don't switch our choice to the new one, so we can definitely terminate the search early. Since the result length cannot exceed the found length, when the found length equals the best result length we already have, we can conclude the search. We did a test in that filesystem: [root@localhost ~]# xfs_db -c freesp /dev/vdb from to extents blocks pct 1 1 215 215 0.01 2 3 994476 1988952 99.99 Before this patch: 0) | xfs_alloc_ag_vextent_size [xfs]() { 0) * 15597.94 us | } After this patch: 0) | xfs_alloc_ag_vextent_size [xfs]() { 0) 19.176 us | } Signed-off-by: Chi Zhiling Reviewed-by: Dave Chinner Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 04f64cf9777e..22bdbb3e9980 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1923,7 +1923,7 @@ restart: error = -EFSCORRUPTED; goto error0; } - if (flen < bestrlen) + if (flen <= bestrlen) break; busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); From dc60992ce76fbc2f71c2674f435ff6bde2108028 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 15:37:22 +0200 Subject: [PATCH 3/4] xfs: fix finding a last resort AG in xfs_filestream_pick_ag When the main loop in xfs_filestream_pick_ag fails to find a suitable AG it tries to just pick the online AG. But the loop for that uses args->pag as loop iterator while the later code expects pag to be set. Fix this by reusing the max_pag case for this last resort, and also add a check for impossible case of no AG just to make sure that the uninitialized pag doesn't even escape in theory. Reported-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Tested-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com Fixes: f8f1ed1ab3baba ("xfs: return a referenced perag from filestreams allocator") Cc: # v6.3 Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_filestream.c | 23 ++++++++++++----------- fs/xfs/xfs_trace.h | 15 +++++---------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index e3aaa0555597..88bd23ce74cd 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -64,7 +64,7 @@ xfs_filestream_pick_ag( struct xfs_perag *pag; struct xfs_perag *max_pag = NULL; xfs_extlen_t minlen = *longest; - xfs_extlen_t free = 0, minfree, maxfree = 0; + xfs_extlen_t minfree, maxfree = 0; xfs_agnumber_t agno; bool first_pass = true; int err; @@ -107,7 +107,6 @@ restart: !(flags & XFS_PICK_USERDATA) || (flags & XFS_PICK_LOWSPACE))) { /* Break out, retaining the reference on the AG. */ - free = pag->pagf_freeblks; break; } } @@ -150,23 +149,25 @@ restart: * grab. */ if (!max_pag) { - for_each_perag_wrap(args->mp, 0, start_agno, args->pag) + for_each_perag_wrap(args->mp, 0, start_agno, pag) { + max_pag = pag; break; - atomic_inc(&args->pag->pagf_fstrms); - *longest = 0; - } else { - pag = max_pag; - free = maxfree; - atomic_inc(&pag->pagf_fstrms); + } + + /* Bail if there are no AGs at all to select from. */ + if (!max_pag) + return -ENOSPC; } + + pag = max_pag; + atomic_inc(&pag->pagf_fstrms); } else if (max_pag) { xfs_perag_rele(max_pag); } - trace_xfs_filestream_pick(pag, pino, free); + trace_xfs_filestream_pick(pag, pino); args->pag = pag; return 0; - } static struct xfs_inode * diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ee9f0b1f548d..fcb2bad4f76e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); TRACE_EVENT(xfs_filestream_pick, - TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free), - TP_ARGS(pag, ino, free), + TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), + TP_ARGS(pag, ino), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) @@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick, TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->ino = ino; - if (pag) { - __entry->agno = pag->pag_agno; - __entry->streams = atomic_read(&pag->pagf_fstrms); - } else { - __entry->agno = NULLAGNUMBER; - __entry->streams = 0; - } - __entry->free = free; + __entry->agno = pag->pag_agno; + __entry->streams = atomic_read(&pag->pagf_fstrms); + __entry->free = pag->pagf_freeblks; ), TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d", MAJOR(__entry->dev), MINOR(__entry->dev), From 81a1e1c32ef474c20ccb9f730afe1ac25b1c62a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 15:37:23 +0200 Subject: [PATCH 4/4] xfs: streamline xfs_filestream_pick_ag Directly return the error from xfs_bmap_longest_free_extent instead of breaking from the loop and handling it there, and use a done label to directly jump to the exist when we found a suitable perag structure to reduce the indentation level and pag/max_pag check complexity in the tail of the function. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_filestream.c | 102 +++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 88bd23ce74cd..290ba8887d29 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -67,22 +67,28 @@ xfs_filestream_pick_ag( xfs_extlen_t minfree, maxfree = 0; xfs_agnumber_t agno; bool first_pass = true; - int err; /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; restart: for_each_perag_wrap(mp, start_agno, agno, pag) { + int err; + trace_xfs_filestream_scan(pag, pino); + *longest = 0; err = xfs_bmap_longest_free_extent(pag, NULL, longest); if (err) { - if (err != -EAGAIN) - break; - /* Couldn't lock the AGF, skip this AG. */ - err = 0; - continue; + if (err == -EAGAIN) { + /* Couldn't lock the AGF, skip this AG. */ + err = 0; + continue; + } + xfs_perag_rele(pag); + if (max_pag) + xfs_perag_rele(max_pag); + return err; } /* Keep track of the AG with the most free blocks. */ @@ -107,7 +113,9 @@ restart: !(flags & XFS_PICK_USERDATA) || (flags & XFS_PICK_LOWSPACE))) { /* Break out, retaining the reference on the AG. */ - break; + if (max_pag) + xfs_perag_rele(max_pag); + goto done; } } @@ -115,56 +123,44 @@ restart: atomic_dec(&pag->pagf_fstrms); } - if (err) { - xfs_perag_rele(pag); - if (max_pag) - xfs_perag_rele(max_pag); - return err; + /* + * Allow a second pass to give xfs_bmap_longest_free_extent() another + * attempt at locking AGFs that it might have skipped over before we + * fail. + */ + if (first_pass) { + first_pass = false; + goto restart; } - if (!pag) { - /* - * Allow a second pass to give xfs_bmap_longest_free_extent() - * another attempt at locking AGFs that it might have skipped - * over before we fail. - */ - if (first_pass) { - first_pass = false; - goto restart; - } - - /* - * We must be low on data space, so run a final lowspace - * optimised selection pass if we haven't already. - */ - if (!(flags & XFS_PICK_LOWSPACE)) { - flags |= XFS_PICK_LOWSPACE; - goto restart; - } - - /* - * No unassociated AGs are available, so select the AG with the - * most free space, regardless of whether it's already in use by - * another filestream. It none suit, just use whatever AG we can - * grab. - */ - if (!max_pag) { - for_each_perag_wrap(args->mp, 0, start_agno, pag) { - max_pag = pag; - break; - } - - /* Bail if there are no AGs at all to select from. */ - if (!max_pag) - return -ENOSPC; - } - - pag = max_pag; - atomic_inc(&pag->pagf_fstrms); - } else if (max_pag) { - xfs_perag_rele(max_pag); + /* + * We must be low on data space, so run a final lowspace optimised + * selection pass if we haven't already. + */ + if (!(flags & XFS_PICK_LOWSPACE)) { + flags |= XFS_PICK_LOWSPACE; + goto restart; } + /* + * No unassociated AGs are available, so select the AG with the most + * free space, regardless of whether it's already in use by another + * filestream. It none suit, just use whatever AG we can grab. + */ + if (!max_pag) { + for_each_perag_wrap(args->mp, 0, start_agno, pag) { + max_pag = pag; + break; + } + + /* Bail if there are no AGs at all to select from. */ + if (!max_pag) + return -ENOSPC; + } + + pag = max_pag; + atomic_inc(&pag->pagf_fstrms); +done: trace_xfs_filestream_pick(pag, pino); args->pag = pag; return 0;