e58ac1770d
Add a new enum and a xfs_dir2_format helper that returns it to allow the code to switch on the format of a directory in a single operation and switch all helpers of xfs_dir2_isblock and xfs_dir2_isleaf to it. This also removes the explicit xfs_iread_extents call in a few of the call sites given that xfs_bmap_last_offset already takes care of it underneath. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
422 lines
10 KiB
C
422 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_dir2.h"
|
|
#include "xfs_dir2_priv.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_error.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/common.h"
|
|
#include "scrub/readdir.h"
|
|
|
|
/* Call a function for every entry in a shortform directory. */
|
|
STATIC int
|
|
xchk_dir_walk_sf(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
xchk_dirent_fn dirent_fn,
|
|
void *priv)
|
|
{
|
|
struct xfs_name name = {
|
|
.name = ".",
|
|
.len = 1,
|
|
.type = XFS_DIR3_FT_DIR,
|
|
};
|
|
struct xfs_mount *mp = dp->i_mount;
|
|
struct xfs_da_geometry *geo = mp->m_dir_geo;
|
|
struct xfs_dir2_sf_entry *sfep;
|
|
struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
|
|
xfs_ino_t ino;
|
|
xfs_dir2_dataptr_t dapos;
|
|
unsigned int i;
|
|
int error;
|
|
|
|
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
|
|
ASSERT(sfp != NULL);
|
|
|
|
/* dot entry */
|
|
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
|
|
geo->data_entry_offset);
|
|
|
|
error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
|
|
if (error)
|
|
return error;
|
|
|
|
/* dotdot entry */
|
|
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
|
|
geo->data_entry_offset +
|
|
xfs_dir2_data_entsize(mp, sizeof(".") - 1));
|
|
ino = xfs_dir2_sf_get_parent_ino(sfp);
|
|
name.name = "..";
|
|
name.len = 2;
|
|
|
|
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
|
|
if (error)
|
|
return error;
|
|
|
|
/* iterate everything else */
|
|
sfep = xfs_dir2_sf_firstentry(sfp);
|
|
for (i = 0; i < sfp->count; i++) {
|
|
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
|
|
xfs_dir2_sf_get_offset(sfep));
|
|
ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
|
|
name.name = sfep->name;
|
|
name.len = sfep->namelen;
|
|
name.type = xfs_dir2_sf_get_ftype(mp, sfep);
|
|
|
|
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
|
|
if (error)
|
|
return error;
|
|
|
|
sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Call a function for every entry in a block directory. */
|
|
STATIC int
|
|
xchk_dir_walk_block(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
xchk_dirent_fn dirent_fn,
|
|
void *priv)
|
|
{
|
|
struct xfs_mount *mp = dp->i_mount;
|
|
struct xfs_da_geometry *geo = mp->m_dir_geo;
|
|
struct xfs_buf *bp;
|
|
unsigned int off, next_off, end;
|
|
int error;
|
|
|
|
error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
|
|
if (error)
|
|
return error;
|
|
|
|
/* Walk each directory entry. */
|
|
end = xfs_dir3_data_end_offset(geo, bp->b_addr);
|
|
for (off = geo->data_entry_offset; off < end; off = next_off) {
|
|
struct xfs_name name = { };
|
|
struct xfs_dir2_data_unused *dup = bp->b_addr + off;
|
|
struct xfs_dir2_data_entry *dep = bp->b_addr + off;
|
|
xfs_ino_t ino;
|
|
xfs_dir2_dataptr_t dapos;
|
|
|
|
/* Skip an empty entry. */
|
|
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
|
|
next_off = off + be16_to_cpu(dup->length);
|
|
continue;
|
|
}
|
|
|
|
/* Otherwise, find the next entry and report it. */
|
|
next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
|
|
if (next_off > end)
|
|
break;
|
|
|
|
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
|
|
ino = be64_to_cpu(dep->inumber);
|
|
name.name = dep->name;
|
|
name.len = dep->namelen;
|
|
name.type = xfs_dir2_data_get_ftype(mp, dep);
|
|
|
|
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
|
|
if (error)
|
|
break;
|
|
}
|
|
|
|
xfs_trans_brelse(sc->tp, bp);
|
|
return error;
|
|
}
|
|
|
|
/* Read a leaf-format directory buffer. */
|
|
STATIC int
|
|
xchk_read_leaf_dir_buf(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *dp,
|
|
struct xfs_da_geometry *geo,
|
|
xfs_dir2_off_t *curoff,
|
|
struct xfs_buf **bpp)
|
|
{
|
|
struct xfs_iext_cursor icur;
|
|
struct xfs_bmbt_irec map;
|
|
struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
|
|
xfs_dablk_t last_da;
|
|
xfs_dablk_t map_off;
|
|
xfs_dir2_off_t new_off;
|
|
|
|
*bpp = NULL;
|
|
|
|
/*
|
|
* Look for mapped directory blocks at or above the current offset.
|
|
* Truncate down to the nearest directory block to start the scanning
|
|
* operation.
|
|
*/
|
|
last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
|
|
map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
|
|
|
|
if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
|
|
return 0;
|
|
if (map.br_startoff >= last_da)
|
|
return 0;
|
|
xfs_trim_extent(&map, map_off, last_da - map_off);
|
|
|
|
/* Read the directory block of that first mapping. */
|
|
new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
|
|
if (new_off > *curoff)
|
|
*curoff = new_off;
|
|
|
|
return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
|
|
}
|
|
|
|
/* Call a function for every entry in a leaf directory. */
|
|
STATIC int
|
|
xchk_dir_walk_leaf(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
xchk_dirent_fn dirent_fn,
|
|
void *priv)
|
|
{
|
|
struct xfs_mount *mp = dp->i_mount;
|
|
struct xfs_da_geometry *geo = mp->m_dir_geo;
|
|
struct xfs_buf *bp = NULL;
|
|
xfs_dir2_off_t curoff = 0;
|
|
unsigned int offset = 0;
|
|
int error;
|
|
|
|
/* Iterate every directory offset in this directory. */
|
|
while (curoff < XFS_DIR2_LEAF_OFFSET) {
|
|
struct xfs_name name = { };
|
|
struct xfs_dir2_data_unused *dup;
|
|
struct xfs_dir2_data_entry *dep;
|
|
xfs_ino_t ino;
|
|
unsigned int length;
|
|
xfs_dir2_dataptr_t dapos;
|
|
|
|
/*
|
|
* If we have no buffer, or we're off the end of the
|
|
* current buffer, need to get another one.
|
|
*/
|
|
if (!bp || offset >= geo->blksize) {
|
|
if (bp) {
|
|
xfs_trans_brelse(sc->tp, bp);
|
|
bp = NULL;
|
|
}
|
|
|
|
error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
|
|
&bp);
|
|
if (error || !bp)
|
|
break;
|
|
|
|
/*
|
|
* Find our position in the block.
|
|
*/
|
|
offset = geo->data_entry_offset;
|
|
curoff += geo->data_entry_offset;
|
|
}
|
|
|
|
/* Skip an empty entry. */
|
|
dup = bp->b_addr + offset;
|
|
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
|
|
length = be16_to_cpu(dup->length);
|
|
offset += length;
|
|
curoff += length;
|
|
continue;
|
|
}
|
|
|
|
/* Otherwise, find the next entry and report it. */
|
|
dep = bp->b_addr + offset;
|
|
length = xfs_dir2_data_entsize(mp, dep->namelen);
|
|
|
|
dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
|
|
ino = be64_to_cpu(dep->inumber);
|
|
name.name = dep->name;
|
|
name.len = dep->namelen;
|
|
name.type = xfs_dir2_data_get_ftype(mp, dep);
|
|
|
|
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
|
|
if (error)
|
|
break;
|
|
|
|
/* Advance to the next entry. */
|
|
offset += length;
|
|
curoff += length;
|
|
}
|
|
|
|
if (bp)
|
|
xfs_trans_brelse(sc->tp, bp);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Call a function for every entry in a directory.
|
|
*
|
|
* Callers must hold the ILOCK. File types are XFS_DIR3_FT_*.
|
|
*/
|
|
int
|
|
xchk_dir_walk(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
xchk_dirent_fn dirent_fn,
|
|
void *priv)
|
|
{
|
|
struct xfs_da_args args = {
|
|
.dp = dp,
|
|
.geo = dp->i_mount->m_dir_geo,
|
|
.trans = sc->tp,
|
|
.owner = dp->i_ino,
|
|
};
|
|
int error;
|
|
|
|
if (xfs_is_shutdown(dp->i_mount))
|
|
return -EIO;
|
|
|
|
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
|
|
xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
|
|
|
|
switch (xfs_dir2_format(&args, &error)) {
|
|
case XFS_DIR2_FMT_SF:
|
|
return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
|
|
case XFS_DIR2_FMT_BLOCK:
|
|
return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
|
|
case XFS_DIR2_FMT_LEAF:
|
|
case XFS_DIR2_FMT_NODE:
|
|
return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
|
|
default:
|
|
return error;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Look up the inode number for an exact name in a directory.
|
|
*
|
|
* Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not
|
|
* checked for correctness.
|
|
*/
|
|
int
|
|
xchk_dir_lookup(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
const struct xfs_name *name,
|
|
xfs_ino_t *ino)
|
|
{
|
|
struct xfs_da_args args = {
|
|
.dp = dp,
|
|
.geo = dp->i_mount->m_dir_geo,
|
|
.trans = sc->tp,
|
|
.name = name->name,
|
|
.namelen = name->len,
|
|
.filetype = name->type,
|
|
.hashval = xfs_dir2_hashname(dp->i_mount, name),
|
|
.whichfork = XFS_DATA_FORK,
|
|
.op_flags = XFS_DA_OP_OKNOENT,
|
|
.owner = dp->i_ino,
|
|
};
|
|
int error;
|
|
|
|
if (xfs_is_shutdown(dp->i_mount))
|
|
return -EIO;
|
|
|
|
/*
|
|
* A temporary directory's block headers are written with the owner
|
|
* set to sc->ip, so we must switch the owner here for the lookup.
|
|
*/
|
|
if (dp == sc->tempip)
|
|
args.owner = sc->ip->i_ino;
|
|
|
|
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
|
|
xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
|
|
|
|
error = xfs_dir_lookup_args(&args);
|
|
if (!error)
|
|
*ino = args.inumber;
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
|
|
* state. The caller may have a transaction, so we must use trylock for both
|
|
* IOLOCKs.
|
|
*/
|
|
static inline unsigned int
|
|
xchk_dir_trylock_both(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *ip)
|
|
{
|
|
if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
|
|
return 0;
|
|
|
|
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
|
|
goto parent_iolock;
|
|
|
|
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
|
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
|
|
goto parent_ilock;
|
|
|
|
return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
|
|
|
|
parent_ilock:
|
|
xchk_iunlock(sc, XFS_ILOCK_EXCL);
|
|
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
parent_iolock:
|
|
xchk_iunlock(sc, XFS_IOLOCK_EXCL);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
|
|
* (@sc->ip) and the inode at the other end (@ip) of a directory or parent
|
|
* pointer link so that we can check that link.
|
|
*
|
|
* We do not know ahead of time that the directory tree is /not/ corrupt, so we
|
|
* cannot use the "lock two inode" functions because we do not know that there
|
|
* is not a racing thread trying to take the locks in opposite order. First
|
|
* take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
|
|
* of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub
|
|
* target and @ip to synchronize with XFS.
|
|
*
|
|
* If the trylocks succeed, *lockmode will be set to the locks held for @ip;
|
|
* @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
|
|
* be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
|
|
* XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed.
|
|
*/
|
|
int
|
|
xchk_dir_trylock_for_pptrs(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *ip,
|
|
unsigned int *lockmode)
|
|
{
|
|
unsigned int nr;
|
|
int error = 0;
|
|
|
|
ASSERT(sc->ilock_flags == 0);
|
|
|
|
for (nr = 0; nr < HZ; nr++) {
|
|
*lockmode = xchk_dir_trylock_both(sc, ip);
|
|
if (*lockmode)
|
|
return 0;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
return error;
|
|
|
|
delay(1);
|
|
}
|
|
|
|
if (sc->flags & XCHK_TRY_HARDER) {
|
|
xchk_set_incomplete(sc);
|
|
return -ETIMEDOUT;
|
|
}
|
|
|
|
return -EDEADLOCK;
|
|
}
|