1

bcachefs: fsck: Improve hash_check_key()

hash_check_key() checks and repairs the hash table btrees: dirents and
xattrs are open addressing hash tables.

We recently had a corruption reported where the hash type on an inode
somehow got flipped, which made the existing dirents invisible and
allowed new ones to be created with the same name.

Now, hash_check_key() can repair duplicates: it will delete one of them,
if it has an xattr or dangling dirent, but if it has two valid dirents
one of them gets renamed.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-10-18 00:22:09 -04:00
parent dc96656b20
commit bc6d2d1041
3 changed files with 190 additions and 46 deletions

View File

@ -250,13 +250,6 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
return ret; return ret;
} }
static void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}
int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
struct bkey_s_c_dirent d, subvol_inum *target) struct bkey_s_c_dirent d, subvol_inum *target)
{ {

View File

@ -34,6 +34,13 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_read_target(struct btree_trans *, subvol_inum, int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *); struct bkey_s_c_dirent, subvol_inum *);
static inline void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}
int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32, int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8, const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *, const struct qstr *, u64, u64 *,

View File

@ -929,35 +929,138 @@ static int get_visible_inodes(struct btree_trans *trans,
return ret; return ret;
} }
static int hash_redo_key(struct btree_trans *trans, static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c k)
{ {
struct bkey_i *delete; if (d.v->d_type == DT_SUBVOL) {
struct bkey_i *tmp; u32 snap;
u64 inum;
int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
return !ret;
} else {
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
int ret = bkey_err(k);
if (ret)
return ret;
delete = bch2_trans_kmalloc(trans, sizeof(*delete)); ret = bkey_is_inode(k.k);
if (IS_ERR(delete)) bch2_trans_iter_exit(trans, &iter);
return PTR_ERR(delete); return ret;
}
}
tmp = bch2_bkey_make_mut_noupdate(trans, k); /*
if (IS_ERR(tmp)) * Prefer to delete the first one, since that will be the one at the wrong
return PTR_ERR(tmp); * offset:
* return value: 0 -> delete k1, 1 -> delete k2
*/
static int hash_pick_winner(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c k1,
struct bkey_s_c k2)
{
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
return 0;
bkey_init(&delete->k); switch (desc.btree_id) {
delete->k.p = k_iter->pos; case BTREE_ID_dirents: {
return bch2_btree_iter_traverse(k_iter) ?: int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1));
bch2_trans_update(trans, k_iter, delete, 0) ?: if (ret < 0)
bch2_hash_set_in_snapshot(trans, desc, hash_info, return ret;
(subvol_inum) { 0, k.k->p.inode }, if (!ret)
k.k->p.snapshot, tmp, return 0;
STR_HASH_must_create|
BTREE_UPDATE_internal_snapshot_node) ?: ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2));
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret < 0)
return ret;
if (!ret)
return 1;
return 2;
}
default:
return 0;
}
}
static int fsck_update_backpointers(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_i *new)
{
if (new->k.type != KEY_TYPE_dirent)
return 0;
struct bkey_i_dirent *d = bkey_i_to_dirent(new);
struct inode_walker target = inode_walker_init();
int ret = 0;
if (d->v.d_type == DT_SUBVOL) {
BUG();
} else {
ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
if (ret)
goto err;
darray_for_each(target.inodes, i) {
i->inode.bi_dir_offset = d->k.p.offset;
ret = __bch2_fsck_write_inode(trans, &i->inode);
if (ret)
goto err;
}
}
err:
inode_walker_exit(&target);
return ret;
}
static int fsck_rename_dirent(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c_dirent old)
{
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
bkey_dirent_init(&new->k_i);
dirent_copy_target(new, old);
new->k.p = old.k->p;
for (unsigned i = 0; i < 1000; i++) {
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
old_name.len, old_name.name, i);
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
if (u64s > U8_MAX)
return -EINVAL;
new->k.u64s = u64s;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
(subvol_inum) { 0, old.k->p.inode },
old.k->p.snapshot, &new->k_i,
BTREE_UPDATE_internal_snapshot_node);
if (!bch2_err_matches(ret, EEXIST))
break;
}
if (ret)
return ret;
return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
} }
static int hash_check_key(struct btree_trans *trans, static int hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc, const struct bch_hash_desc desc,
struct bch_hash_info *hash_info, struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k) struct btree_iter *k_iter, struct bkey_s_c hash_k)
@ -986,16 +1089,9 @@ static int hash_check_key(struct btree_trans *trans,
if (bkey_eq(k.k->p, hash_k.k->p)) if (bkey_eq(k.k->p, hash_k.k->p))
break; break;
if (fsck_err_on(k.k->type == desc.key_type && if (k.k->type == desc.key_type &&
!desc.cmp_bkey(k, hash_k), !desc.cmp_bkey(k, hash_k))
trans, hash_table_key_duplicate, goto duplicate_entries;
"duplicate hash table keys:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k),
buf.buf))) {
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
break;
}
if (bkey_deleted(k.k)) { if (bkey_deleted(k.k)) {
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
@ -1008,18 +1104,66 @@ out:
return ret; return ret;
bad_hash: bad_hash:
if (fsck_err(trans, hash_table_key_wrong_offset, if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
bch_err_fn(c, ret); if (IS_ERR(new))
return PTR_ERR(new);
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
(subvol_inum) { 0, hash_k.k->p.inode },
hash_k.k->p.snapshot, new,
STR_HASH_must_create|
BTREE_ITER_with_updates|
BTREE_UPDATE_internal_snapshot_node);
ret = bkey_err(k);
if (ret) if (ret)
return ret; goto out;
ret = -BCH_ERR_transaction_restart_nested; if (k.k)
goto duplicate_entries;
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
BTREE_UPDATE_internal_snapshot_node) ?:
fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
} }
fsck_err: fsck_err:
goto out; goto out;
duplicate_entries:
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
if (ret < 0)
goto out;
if (!fsck_err(trans, hash_table_key_duplicate,
"duplicate hash table keys%s:\n%s",
ret != 2 ? "" : ", both point to valid inodes",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k),
prt_newline(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf)))
goto out;
switch (ret) {
case 0:
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
break;
case 1:
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
break;
case 2:
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
goto out;
}
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
} }
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
@ -2336,7 +2480,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode); *hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false; dir->first_this_inode = false;
ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
if (ret < 0) if (ret < 0)
goto err; goto err;
if (ret) { if (ret) {
@ -2450,7 +2594,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode); *hash_info = bch2_hash_info_init(c, &i->inode);
inode->first_this_inode = false; inode->first_this_inode = false;
ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
} }