From dde8cb11645b7d95766dfd13f273facde27923a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Jul 2023 00:56:29 -0400 Subject: [PATCH] bcachefs: bcachefs_metadata_version_deleted_inodes Add a new bitset btree for inodes pending deletion; this means we no longer have to scan the full inodes btree after an unclean shutdown. Specifically, this adds: - a trigger to update the deleted_inodes btree based on changes to the inodes btree - a new recovery pass - and check_inodes is now only a fsck pass. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 8 ++- fs/bcachefs/inode.c | 115 ++++++++++++++++++++++++++++++++++ fs/bcachefs/inode.h | 1 + fs/bcachefs/recovery_types.h | 3 +- 5 files changed, 125 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 87be62c27414..e1f1e8e871a8 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -453,6 +453,7 @@ enum gc_phase { GC_PHASE_BTREE_backpointers, GC_PHASE_BTREE_bucket_gens, GC_PHASE_BTREE_snapshot_trees, + GC_PHASE_BTREE_deleted_inodes, GC_PHASE_PENDING_DELETE, }; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b771d80f6361..5ec218ee3569 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1629,7 +1629,9 @@ struct bch_sb_field_journal_seq_blacklist { x(major_minor, BCH_VERSION(1, 0), \ 0) \ x(snapshot_skiplists, BCH_VERSION(1, 1), \ - BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ + x(deleted_inodes, BCH_VERSION(1, 2), \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -2251,7 +2253,9 @@ enum btree_id_flags { x(bucket_gens, 14, 0, \ BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ - BIT_ULL(KEY_TYPE_snapshot_tree)) + BIT_ULL(KEY_TYPE_snapshot_tree)) \ + x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \ + BIT_ULL(KEY_TYPE_set)) enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 755cf7d177cd..294966e42850 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" +#include "btree_write_buffer.h" #include "bkey_methods.h" #include "btree_update.h" #include "buckets.h" @@ -519,6 +520,25 @@ void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c __bch2_inode_unpacked_to_text(out, &inode); } +static inline u64 bkey_inode_flags(struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_inode: + return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags); + case KEY_TYPE_inode_v2: + return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags); + case KEY_TYPE_inode_v3: + return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags); + default: + return 0; + } +} + +static inline bool bkey_is_deleted_inode(struct bkey_s_c k) +{ + return bkey_inode_flags(k) & BCH_INODE_UNLINKED; +} + int bch2_trans_mark_inode(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, @@ -526,6 +546,8 @@ int bch2_trans_mark_inode(struct btree_trans *trans, unsigned flags) { int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); + bool old_deleted = bkey_is_deleted_inode(old); + bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new)); if (nr) { int ret = bch2_replicas_deltas_realloc(trans, 0); @@ -537,6 +559,12 @@ int bch2_trans_mark_inode(struct btree_trans *trans, d->nr_inodes += nr; } + if (old_deleted != new_deleted) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted); + if (ret) + return ret; + } + return 0; } @@ -986,3 +1014,90 @@ err: return ret ?: -BCH_ERR_transaction_restart_nested; } + +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + struct bch_inode_unpacked inode; + int ret; + + if (bch2_snapshot_is_internal_node(c, pos.snapshot)) + return 0; + + if (!fsck_err_on(c->sb.clean, c, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) + return 0; + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); + ret = bkey_err(k); + if (ret) + return ret; + + ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; + if (fsck_err_on(!bkey_is_inode(k.k), c, + "nonexistent inode %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + + ret = bch2_inode_unpack(k, &inode); + if (ret) + goto err; + + if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c, + "non-deleted inode %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + + return 1; +err: +fsck_err: + return ret; +delete: + return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); +} + +int bch2_delete_dead_inodes(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + ret = bch2_btree_write_buffer_flush_sync(&trans); + if (ret) + goto err; + + /* + * Weird transaction restart handling here because on successful delete, + * bch2_inode_rm_snapshot() will return a nested transaction restart, + * but we can't retry because the btree write buffer won't have been + * flushed and we'd spin: + */ + for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p)); + if (ret < 0) + break; + + if (ret) { + if (!test_bit(BCH_FS_RW, &c->flags)) { + bch2_trans_unlock(&trans); + bch2_fs_lazy_rw(c); + } + + ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + break; + } + } + bch2_trans_iter_exit(&trans, &iter); +err: + bch2_trans_exit(&trans); + + return ret; +} diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 1b9dc27e82bd..22b24405649f 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -199,5 +199,6 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); +int bch2_delete_dead_inodes(struct bch_fs *); #endif /* _BCACHEFS_INODE_H */ diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 377f51188a20..abf1f834ec7a 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -29,13 +29,14 @@ x(check_subvols, PASS_FSCK) \ x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ x(fs_upgrade_for_subvolumes, 0) \ - x(check_inodes, PASS_FSCK|PASS_UNCLEAN) \ + x(check_inodes, PASS_FSCK) \ x(check_extents, PASS_FSCK) \ x(check_dirents, PASS_FSCK) \ x(check_xattrs, PASS_FSCK) \ x(check_root, PASS_FSCK) \ x(check_directory_structure, PASS_FSCK) \ x(check_nlinks, PASS_FSCK) \ + x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 0) \ enum bch_recovery_pass {