xfs: deferred scrub of dirents
If the trylock-based parent pointer check fails, retain those dirents and check them at the end. This may involve dropping the locks on the file being scanned, so yay. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
61b3f0df5c
commit
b961c8bf1f
@ -24,6 +24,10 @@
|
|||||||
#include "scrub/readdir.h"
|
#include "scrub/readdir.h"
|
||||||
#include "scrub/health.h"
|
#include "scrub/health.h"
|
||||||
#include "scrub/repair.h"
|
#include "scrub/repair.h"
|
||||||
|
#include "scrub/trace.h"
|
||||||
|
#include "scrub/xfile.h"
|
||||||
|
#include "scrub/xfarray.h"
|
||||||
|
#include "scrub/xfblob.h"
|
||||||
|
|
||||||
/* Set us up to scrub directories. */
|
/* Set us up to scrub directories. */
|
||||||
int
|
int
|
||||||
@ -43,12 +47,37 @@ xchk_setup_directory(
|
|||||||
|
|
||||||
/* Directories */
|
/* Directories */
|
||||||
|
|
||||||
|
/* Deferred directory entry that we saved for later. */
|
||||||
|
struct xchk_dirent {
|
||||||
|
/* Cookie for retrieval of the dirent name. */
|
||||||
|
xfblob_cookie name_cookie;
|
||||||
|
|
||||||
|
/* Child inode number. */
|
||||||
|
xfs_ino_t ino;
|
||||||
|
|
||||||
|
/* Length of the pptr name. */
|
||||||
|
uint8_t namelen;
|
||||||
|
};
|
||||||
|
|
||||||
struct xchk_dir {
|
struct xchk_dir {
|
||||||
struct xfs_scrub *sc;
|
struct xfs_scrub *sc;
|
||||||
|
|
||||||
/* information for parent pointer validation. */
|
/* information for parent pointer validation. */
|
||||||
struct xfs_parent_rec pptr_rec;
|
struct xfs_parent_rec pptr_rec;
|
||||||
struct xfs_da_args pptr_args;
|
struct xfs_da_args pptr_args;
|
||||||
|
|
||||||
|
/* Fixed-size array of xchk_dirent structures. */
|
||||||
|
struct xfarray *dir_entries;
|
||||||
|
|
||||||
|
/* Blobs containing dirent names. */
|
||||||
|
struct xfblob *dir_names;
|
||||||
|
|
||||||
|
/* If we've cycled the ILOCK, we must revalidate deferred dirents. */
|
||||||
|
bool need_revalidate;
|
||||||
|
|
||||||
|
/* Name buffer for dirent revalidation. */
|
||||||
|
struct xfs_name xname;
|
||||||
|
uint8_t namebuf[MAXNAMELEN];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Scrub a directory entry. */
|
/* Scrub a directory entry. */
|
||||||
@ -148,8 +177,26 @@ xchk_dir_check_pptr_fast(
|
|||||||
/* Try to lock the inode. */
|
/* Try to lock the inode. */
|
||||||
lockmode = xchk_dir_lock_child(sc, ip);
|
lockmode = xchk_dir_lock_child(sc, ip);
|
||||||
if (!lockmode) {
|
if (!lockmode) {
|
||||||
xchk_set_incomplete(sc);
|
struct xchk_dirent save_de = {
|
||||||
return -ECANCELED;
|
.namelen = name->len,
|
||||||
|
.ino = ip->i_ino,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Couldn't lock the inode, so save the dirent for later. */
|
||||||
|
trace_xchk_dir_defer(sc->ip, name, ip->i_ino);
|
||||||
|
|
||||||
|
error = xfblob_storename(sd->dir_names, &save_de.name_cookie,
|
||||||
|
name);
|
||||||
|
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
|
||||||
|
&error))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
error = xfarray_append(sd->dir_entries, &save_de);
|
||||||
|
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
|
||||||
|
&error))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = xchk_dir_parent_pointer(sd, name, ip);
|
error = xchk_dir_parent_pointer(sd, name, ip);
|
||||||
@ -865,6 +912,142 @@ out:
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Revalidate a dirent that we collected in the past but couldn't check because
|
||||||
|
* of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it
|
||||||
|
* has gone away on us, or a negative errno.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xchk_dir_revalidate_dirent(
|
||||||
|
struct xchk_dir *sd,
|
||||||
|
const struct xfs_name *xname,
|
||||||
|
xfs_ino_t ino)
|
||||||
|
{
|
||||||
|
struct xfs_scrub *sc = sd->sc;
|
||||||
|
xfs_ino_t child_ino;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look up the directory entry. If we get -ENOENT, the directory entry
|
||||||
|
* went away and there's nothing to revalidate. Return any other
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
/* The inode number changed, nothing to revalidate. */
|
||||||
|
if (ino != child_ino)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check a directory entry's parent pointers the slow way, which means we cycle
|
||||||
|
* locks a bunch and put up with revalidation until we get it done.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xchk_dir_slow_dirent(
|
||||||
|
struct xchk_dir *sd,
|
||||||
|
struct xchk_dirent *dirent,
|
||||||
|
const struct xfs_name *xname)
|
||||||
|
{
|
||||||
|
struct xfs_scrub *sc = sd->sc;
|
||||||
|
struct xfs_inode *ip;
|
||||||
|
unsigned int lockmode;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
/* Check that the deferred dirent still exists. */
|
||||||
|
if (sd->need_revalidate) {
|
||||||
|
error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
|
||||||
|
if (error == -ENOENT)
|
||||||
|
return 0;
|
||||||
|
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
|
||||||
|
&error))
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = xchk_iget(sc, dirent->ino, &ip);
|
||||||
|
if (error == -EINVAL || error == -ENOENT) {
|
||||||
|
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we can grab both IOLOCK and ILOCK of the alleged child, we can
|
||||||
|
* proceed with the validation.
|
||||||
|
*/
|
||||||
|
lockmode = xchk_dir_lock_child(sc, ip);
|
||||||
|
if (lockmode) {
|
||||||
|
trace_xchk_dir_slowpath(sc->ip, xname, ip->i_ino);
|
||||||
|
goto check_pptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We couldn't lock the child file. Drop all the locks and try to
|
||||||
|
* get them again, one at a time.
|
||||||
|
*/
|
||||||
|
xchk_iunlock(sc, sc->ilock_flags);
|
||||||
|
sd->need_revalidate = true;
|
||||||
|
|
||||||
|
trace_xchk_dir_ultraslowpath(sc->ip, xname, ip->i_ino);
|
||||||
|
|
||||||
|
error = xchk_dir_trylock_for_pptrs(sc, ip, &lockmode);
|
||||||
|
if (error)
|
||||||
|
goto out_rele;
|
||||||
|
|
||||||
|
/* Revalidate, since we just cycled the locks. */
|
||||||
|
error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
|
||||||
|
if (error == -ENOENT) {
|
||||||
|
error = 0;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
check_pptr:
|
||||||
|
error = xchk_dir_parent_pointer(sd, xname, ip);
|
||||||
|
out_unlock:
|
||||||
|
xfs_iunlock(ip, lockmode);
|
||||||
|
out_rele:
|
||||||
|
xchk_irele(sc, ip);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check all the dirents that we deferred the first time around. */
|
||||||
|
STATIC int
|
||||||
|
xchk_dir_finish_slow_dirents(
|
||||||
|
struct xchk_dir *sd)
|
||||||
|
{
|
||||||
|
xfarray_idx_t array_cur;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
foreach_xfarray_idx(sd->dir_entries, array_cur) {
|
||||||
|
struct xchk_dirent dirent;
|
||||||
|
|
||||||
|
if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error = xfarray_load(sd->dir_entries, array_cur, &dirent);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
error = xfblob_loadname(sd->dir_names, dirent.name_cookie,
|
||||||
|
&sd->xname, dirent.namelen);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
|
||||||
|
error = xchk_dir_slow_dirent(sd, &dirent, &sd->xname);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Scrub a whole directory. */
|
/* Scrub a whole directory. */
|
||||||
int
|
int
|
||||||
xchk_directory(
|
xchk_directory(
|
||||||
@ -907,11 +1090,56 @@ xchk_directory(
|
|||||||
if (!sd)
|
if (!sd)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
sd->sc = sc;
|
sd->sc = sc;
|
||||||
|
sd->xname.name = sd->namebuf;
|
||||||
|
|
||||||
|
if (xfs_has_parent(sc->mp)) {
|
||||||
|
char *descr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up some staging memory for dirents that we can't check
|
||||||
|
* due to locking contention.
|
||||||
|
*/
|
||||||
|
descr = xchk_xfile_ino_descr(sc, "slow directory entries");
|
||||||
|
error = xfarray_create(descr, 0, sizeof(struct xchk_dirent),
|
||||||
|
&sd->dir_entries);
|
||||||
|
kfree(descr);
|
||||||
|
if (error)
|
||||||
|
goto out_sd;
|
||||||
|
|
||||||
|
descr = xchk_xfile_ino_descr(sc, "slow directory entry names");
|
||||||
|
error = xfblob_create(descr, &sd->dir_names);
|
||||||
|
kfree(descr);
|
||||||
|
if (error)
|
||||||
|
goto out_entries;
|
||||||
|
}
|
||||||
|
|
||||||
/* Look up every name in this directory by hash. */
|
/* Look up every name in this directory by hash. */
|
||||||
error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
|
error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
|
||||||
|
if (error == -ECANCELED)
|
||||||
|
error = 0;
|
||||||
|
if (error)
|
||||||
|
goto out_names;
|
||||||
|
|
||||||
|
if (xfs_has_parent(sc->mp)) {
|
||||||
|
error = xchk_dir_finish_slow_dirents(sd);
|
||||||
|
if (error == -ETIMEDOUT) {
|
||||||
|
/* Couldn't grab a lock, scrub was marked incomplete */
|
||||||
|
error = 0;
|
||||||
|
goto out_names;
|
||||||
|
}
|
||||||
|
if (error)
|
||||||
|
goto out_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
out_names:
|
||||||
|
if (sd->dir_names)
|
||||||
|
xfblob_destroy(sd->dir_names);
|
||||||
|
out_entries:
|
||||||
|
if (sd->dir_entries)
|
||||||
|
xfarray_destroy(sd->dir_entries);
|
||||||
|
out_sd:
|
||||||
kvfree(sd);
|
kvfree(sd);
|
||||||
if (error && error != -ECANCELED)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
/* If the dir is clean, it is clearly not zapped. */
|
/* If the dir is clean, it is clearly not zapped. */
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "xfs_trans.h"
|
#include "xfs_trans.h"
|
||||||
#include "xfs_error.h"
|
#include "xfs_error.h"
|
||||||
#include "scrub/scrub.h"
|
#include "scrub/scrub.h"
|
||||||
|
#include "scrub/common.h"
|
||||||
#include "scrub/readdir.h"
|
#include "scrub/readdir.h"
|
||||||
|
|
||||||
/* Call a function for every entry in a shortform directory. */
|
/* Call a function for every entry in a shortform directory. */
|
||||||
@ -380,3 +381,80 @@ out_check_rval:
|
|||||||
*ino = args.inumber;
|
*ino = args.inumber;
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
|
||||||
|
* state. The caller may have a transaction, so we must use trylock for both
|
||||||
|
* IOLOCKs.
|
||||||
|
*/
|
||||||
|
static inline unsigned int
|
||||||
|
xchk_dir_trylock_both(
|
||||||
|
struct xfs_scrub *sc,
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
|
||||||
|
goto parent_iolock;
|
||||||
|
|
||||||
|
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||||
|
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
|
||||||
|
goto parent_ilock;
|
||||||
|
|
||||||
|
return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
|
||||||
|
|
||||||
|
parent_ilock:
|
||||||
|
xchk_iunlock(sc, XFS_ILOCK_EXCL);
|
||||||
|
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
||||||
|
parent_iolock:
|
||||||
|
xchk_iunlock(sc, XFS_IOLOCK_EXCL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
|
||||||
|
* (@sc->ip) and the inode at the other end (@ip) of a directory or parent
|
||||||
|
* pointer link so that we can check that link.
|
||||||
|
*
|
||||||
|
* We do not know ahead of time that the directory tree is /not/ corrupt, so we
|
||||||
|
* cannot use the "lock two inode" functions because we do not know that there
|
||||||
|
* is not a racing thread trying to take the locks in opposite order. First
|
||||||
|
* take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
|
||||||
|
* of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub
|
||||||
|
* target and @ip to synchronize with XFS.
|
||||||
|
*
|
||||||
|
* If the trylocks succeed, *lockmode will be set to the locks held for @ip;
|
||||||
|
* @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
|
||||||
|
* be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
|
||||||
|
* XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xchk_dir_trylock_for_pptrs(
|
||||||
|
struct xfs_scrub *sc,
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
unsigned int *lockmode)
|
||||||
|
{
|
||||||
|
unsigned int nr;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
ASSERT(sc->ilock_flags == 0);
|
||||||
|
|
||||||
|
for (nr = 0; nr < HZ; nr++) {
|
||||||
|
*lockmode = xchk_dir_trylock_both(sc, ip);
|
||||||
|
if (*lockmode)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (xchk_should_terminate(sc, &error))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
delay(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sc->flags & XCHK_TRY_HARDER) {
|
||||||
|
xchk_set_incomplete(sc);
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EDEADLOCK;
|
||||||
|
}
|
||||||
|
@ -16,4 +16,7 @@ int xchk_dir_walk(struct xfs_scrub *sc, struct xfs_inode *dp,
|
|||||||
int xchk_dir_lookup(struct xfs_scrub *sc, struct xfs_inode *dp,
|
int xchk_dir_lookup(struct xfs_scrub *sc, struct xfs_inode *dp,
|
||||||
const struct xfs_name *name, xfs_ino_t *ino);
|
const struct xfs_name *name, xfs_ino_t *ino);
|
||||||
|
|
||||||
|
int xchk_dir_trylock_for_pptrs(struct xfs_scrub *sc, struct xfs_inode *ip,
|
||||||
|
unsigned int *lockmode);
|
||||||
|
|
||||||
#endif /* __XFS_SCRUB_READDIR_H__ */
|
#endif /* __XFS_SCRUB_READDIR_H__ */
|
||||||
|
@ -1511,6 +1511,40 @@ DEFINE_EVENT(xchk_nlinks_diff_class, name, \
|
|||||||
TP_ARGS(mp, ip, live))
|
TP_ARGS(mp, ip, live))
|
||||||
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xchk_nlinks_compare_inode);
|
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xchk_nlinks_compare_inode);
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(xchk_pptr_class,
|
||||||
|
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name,
|
||||||
|
xfs_ino_t far_ino),
|
||||||
|
TP_ARGS(ip, name, far_ino),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(dev_t, dev)
|
||||||
|
__field(xfs_ino_t, ino)
|
||||||
|
__field(unsigned int, namelen)
|
||||||
|
__dynamic_array(char, name, name->len)
|
||||||
|
__field(xfs_ino_t, far_ino)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dev = ip->i_mount->m_super->s_dev;
|
||||||
|
__entry->ino = ip->i_ino;
|
||||||
|
__entry->namelen = name->len;
|
||||||
|
memcpy(__get_str(name), name, name->len);
|
||||||
|
__entry->far_ino = far_ino;
|
||||||
|
),
|
||||||
|
TP_printk("dev %d:%d ino 0x%llx name '%.*s' far_ino 0x%llx",
|
||||||
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
|
__entry->ino,
|
||||||
|
__entry->namelen,
|
||||||
|
__get_str(name),
|
||||||
|
__entry->far_ino)
|
||||||
|
)
|
||||||
|
#define DEFINE_XCHK_PPTR_EVENT(name) \
|
||||||
|
DEFINE_EVENT(xchk_pptr_class, name, \
|
||||||
|
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name, \
|
||||||
|
xfs_ino_t far_ino), \
|
||||||
|
TP_ARGS(ip, name, far_ino))
|
||||||
|
DEFINE_XCHK_PPTR_EVENT(xchk_dir_defer);
|
||||||
|
DEFINE_XCHK_PPTR_EVENT(xchk_dir_slowpath);
|
||||||
|
DEFINE_XCHK_PPTR_EVENT(xchk_dir_ultraslowpath);
|
||||||
|
|
||||||
/* repair tracepoints */
|
/* repair tracepoints */
|
||||||
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
|
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user