linux/fs/xfs/scrub/newbt.h

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#ifndef __XFS_SCRUB_NEWBT_H__
#define __XFS_SCRUB_NEWBT_H__

struct xfs_alloc_arg;

struct xrep_newbt_resv {
	/* Link to list of extents that we've reserved. */
	struct list_head	list;

	struct xfs_perag	*pag;

	/* Auto-freeing this reservation if we don't commit. */
	struct xfs_alloc_autoreap autoreap;

	/* AG block of the extent we reserved. */
	xfs_agblock_t		agbno;

	/* Length of the reservation. */
	xfs_extlen_t		len;

	/* How much of this reservation has been used. */
	xfs_extlen_t		used;
};

struct xrep_newbt {
	struct xfs_scrub	*sc;

	/* Custom allocation function, or NULL for xfs_alloc_vextent */
	int			(*alloc_vextent)(struct xfs_scrub *sc,
						 struct xfs_alloc_arg *args,
						 xfs_fsblock_t alloc_hint);

	/* List of extents that we've reserved. */
	struct list_head	resv_list;

	/* Fake root for new btree. */
	union {
		struct xbtree_afakeroot	afake;
		struct xbtree_ifakeroot	ifake;
	};

	/* rmap owner of these blocks */
	struct xfs_owner_info	oinfo;

	/* btree geometry for the bulk loader */
	struct xfs_btree_bload	bload;

	/* Allocation hint */
	xfs_fsblock_t		alloc_hint;

	/* per-ag reservation type */
	enum xfs_ag_resv_type	resv;
};

void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct xfs_scrub *sc);
void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
		enum xfs_ag_resv_type resv);
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
		int whichfork, const struct xfs_owner_info *oinfo);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
int xrep_newbt_add_extent(struct xrep_newbt *xnr, struct xfs_perag *pag,
		xfs_agblock_t agbno, xfs_extlen_t len);
void xrep_newbt_cancel(struct xrep_newbt *xnr);
int xrep_newbt_commit(struct xrep_newbt *xnr);
int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
		union xfs_btree_ptr *ptr);
unsigned int xrep_newbt_unused_blocks(struct xrep_newbt *xnr);

#endif /* __XFS_SCRUB_NEWBT_H__ */
xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00			`// SPDX-License-Identifier: GPL-2.0-or-later`
			`/*`
			`* Copyright (C) 2022-2023 Oracle. All Rights Reserved.`
			`* Author: Darrick J. Wong <djwong@kernel.org>`
			`*/`
			`#ifndef __XFS_SCRUB_NEWBT_H__`
			`#define __XFS_SCRUB_NEWBT_H__`

xfs: repair the rmapbt Rebuild the reverse mapping btree from all primary metadata. This first patch establishes the bare mechanics of finding records and putting together a new ondisk tree; more complex pieces are needed to make it work properly. Link: Documentation/filesystems/xfs-online-fsck-design.rst Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> 2024-02-22 13:43:38 -07:00			`struct xfs_alloc_arg;`

xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00			`struct xrep_newbt_resv {`
			`/* Link to list of extents that we've reserved. */`
			`struct list_head list;`

			`struct xfs_perag *pag;`

xfs: log EFIs for all btree blocks being used to stage a btree We need to log EFIs for every extent that we allocate for the purpose of staging a new btree so that if we fail then the blocks will be freed during log recovery. Use the autoreaping mechanism provided by the previous patch to attach paused freeing work to the scrub transaction. We can then mark the EFIs stale if we decide to commit the new btree, or we can unpause the EFIs if we decide to abort the repair. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> 2023-12-06 19:41:00 -07:00			`/* Auto-freeing this reservation if we don't commit. */`
			`struct xfs_alloc_autoreap autoreap;`

xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00			`/* AG block of the extent we reserved. */`
			`xfs_agblock_t agbno;`

			`/* Length of the reservation. */`
			`xfs_extlen_t len;`

			`/* How much of this reservation has been used. */`
			`xfs_extlen_t used;`
			`};`

			`struct xrep_newbt {`
			`struct xfs_scrub *sc;`

xfs: repair the rmapbt Rebuild the reverse mapping btree from all primary metadata. This first patch establishes the bare mechanics of finding records and putting together a new ondisk tree; more complex pieces are needed to make it work properly. Link: Documentation/filesystems/xfs-online-fsck-design.rst Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> 2024-02-22 13:43:38 -07:00			`/* Custom allocation function, or NULL for xfs_alloc_vextent */`
			`int (alloc_vextent)(struct xfs_scrub sc,`
			`struct xfs_alloc_arg *args,`
			`xfs_fsblock_t alloc_hint);`

xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00			`/* List of extents that we've reserved. */`
			`struct list_head resv_list;`

			`/* Fake root for new btree. */`
			`union {`
			`struct xbtree_afakeroot afake;`
			`struct xbtree_ifakeroot ifake;`
			`};`

			`/* rmap owner of these blocks */`
			`struct xfs_owner_info oinfo;`

			`/* btree geometry for the bulk loader */`
			`struct xfs_btree_bload bload;`

			`/* Allocation hint */`
			`xfs_fsblock_t alloc_hint;`

			`/* per-ag reservation type */`
			`enum xfs_ag_resv_type resv;`
			`};`

			`void xrep_newbt_init_bare(struct xrep_newbt xnr, struct xfs_scrub sc);`
			`void xrep_newbt_init_ag(struct xrep_newbt xnr, struct xfs_scrub sc,`
			`const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,`
			`enum xfs_ag_resv_type resv);`
			`int xrep_newbt_init_inode(struct xrep_newbt xnr, struct xfs_scrub sc,`
			`int whichfork, const struct xfs_owner_info *oinfo);`
			`int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);`
xfs: repair free space btrees Rebuild the free space btrees from the gaps in the rmap btree. Refer to the case study in Documentation/filesystems/xfs-online-fsck-design.rst for more details. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> 2023-12-15 11:03:32 -07:00			`int xrep_newbt_add_extent(struct xrep_newbt xnr, struct xfs_perag pag,`
			`xfs_agblock_t agbno, xfs_extlen_t len);`
xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00			`void xrep_newbt_cancel(struct xrep_newbt *xnr);`
			`int xrep_newbt_commit(struct xrep_newbt *xnr);`
			`int xrep_newbt_claim_block(struct xfs_btree_cur cur, struct xrep_newbt xnr,`
			`union xfs_btree_ptr *ptr);`
xfs: repair free space btrees Rebuild the free space btrees from the gaps in the rmap btree. Refer to the case study in Documentation/filesystems/xfs-online-fsck-design.rst for more details. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> 2023-12-15 11:03:32 -07:00			`unsigned int xrep_newbt_unused_blocks(struct xrep_newbt *xnr);`
xfs: implement block reservation accounting for btrees we're staging Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. As for the particular choice of lowspace thresholds and btree block slack factors -- at this point one could say that the thresholds in online repair come from bulkload_estimate_ag_slack in xfs_repair[1]. But that's not the entire story, since the offline btree rebuilding code in xfs_repair was merged as a retroport of the online btree code in this patchset! Before xfs_btree_staging.[ch] came along, xfs_repair determined the slack factor (aka the number of slots to leave unfilled in each new btree block) via open-coded logic in repair/phase5.c[2]. At that point the slack factors were arbitrary quantities per btree. The rmapbt automatically left 10 slots free; everything else left zero. That had a noticeable effect on performance straight after mounting because adding records to /any/ btree would result in splits. A few years ago when this patch was first written, Dave and I decided that repair should generate btree blocks that were 75% full unless space was tight, in which case it should try to fill the blocks to nearly full. We defined tight as ~10% free to avoid repair failures but settled on 3/32 (~9%) to avoid div64. IOWs, we mostly pulled the thresholds out of thin air. We've been QAing with those geometry numbers ever since. ;) Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114 Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349 Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com> 2023-12-06 19:40:59 -07:00
			`#endif /* __XFS_SCRUB_NEWBT_H__ */`