56bec294de
The extent allocation tree maintains a reference count and full back reference information for every extent allocated in the filesystem. For subvolume and snapshot trees, every time a block goes through COW, the new copy of the block adds a reference on every block it points to. If a btree node points to 150 leaves, then the COW code needs to go and add backrefs on 150 different extents, which might be spread all over the extent allocation tree. These updates currently happen during btrfs_cow_block, and most COWs happen during btrfs_search_slot. btrfs_search_slot has locks held on both the parent and the node we are COWing, and so we really want to avoid IO during the COW if we can. This commit adds an rbtree of pending reference count updates and extent allocations. The tree is ordered by byte number of the extent and byte number of the parent for the back reference. The tree allows us to: 1) Modify back references in something close to disk order, reducing seeks 2) Significantly reduce the number of modifications made as block pointers are balanced around 3) Do all of the extent insertion and back reference modifications outside of the performance critical btrfs_search_slot code. #3 has the added benefit of greatly reducing the btrfs stack footprint. The extent allocation tree modifications are done without the deep (and somewhat recursive) call chains used in the past. These delayed back reference updates must be done before the transaction commits, and so the rbtree is tied to the transaction. Throttling is implemented to help keep the queue of backrefs at a reasonable size. Since there was a similar mechanism in place for the extent tree extents, that is removed and replaced by the delayed reference tree. Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
110 lines
3.4 KiB
C
110 lines
3.4 KiB
C
/*
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#ifndef __BTRFS_TRANSACTION__
|
|
#define __BTRFS_TRANSACTION__
|
|
#include "btrfs_inode.h"
|
|
#include "delayed-ref.h"
|
|
|
|
struct btrfs_transaction {
|
|
u64 transid;
|
|
unsigned long num_writers;
|
|
unsigned long num_joined;
|
|
int in_commit;
|
|
int use_count;
|
|
int commit_done;
|
|
int blocked;
|
|
struct list_head list;
|
|
struct extent_io_tree dirty_pages;
|
|
unsigned long start_time;
|
|
wait_queue_head_t writer_wait;
|
|
wait_queue_head_t commit_wait;
|
|
struct list_head pending_snapshots;
|
|
struct btrfs_delayed_ref_root delayed_refs;
|
|
};
|
|
|
|
struct btrfs_trans_handle {
|
|
u64 transid;
|
|
unsigned long blocks_reserved;
|
|
unsigned long blocks_used;
|
|
struct btrfs_transaction *transaction;
|
|
u64 block_group;
|
|
u64 alloc_exclude_start;
|
|
u64 alloc_exclude_nr;
|
|
unsigned long delayed_ref_updates;
|
|
};
|
|
|
|
struct btrfs_pending_snapshot {
|
|
struct dentry *dentry;
|
|
struct btrfs_root *root;
|
|
char *name;
|
|
struct btrfs_key root_key;
|
|
struct list_head list;
|
|
};
|
|
|
|
struct btrfs_dirty_root {
|
|
struct list_head list;
|
|
struct btrfs_root *root;
|
|
struct btrfs_root *latest_root;
|
|
};
|
|
|
|
static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
|
|
struct inode *inode)
|
|
{
|
|
trans->block_group = BTRFS_I(inode)->block_group;
|
|
}
|
|
|
|
static inline void btrfs_update_inode_block_group(
|
|
struct btrfs_trans_handle *trans,
|
|
struct inode *inode)
|
|
{
|
|
BTRFS_I(inode)->block_group = trans->block_group;
|
|
}
|
|
|
|
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
|
|
struct inode *inode)
|
|
{
|
|
BTRFS_I(inode)->last_trans = trans->transaction->transid;
|
|
}
|
|
|
|
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
|
|
int num_blocks);
|
|
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
|
|
int num_blocks);
|
|
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
|
|
int num_blocks);
|
|
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
|
|
int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
|
|
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
|
|
int btrfs_clean_old_snapshots(struct btrfs_root *root);
|
|
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root);
|
|
void btrfs_throttle(struct btrfs_root *root);
|
|
int btrfs_record_root_in_trans(struct btrfs_root *root);
|
|
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
|
|
struct extent_io_tree *dirty_pages);
|
|
#endif
|