for-6.10-rc2-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmZjGxEACgkQxWXV+ddt WDt/lQ/9G5Ieozi7BoHcBTmE5BpyRXqfQ5dj9VaWFLZEH+rvdOiZlQ5QlcI1gkUg jbSp7SB8qhrIbBwv2b58xgnGuuUxE2dd3tPP655bBhe7o4xvAKOK8XBVyVQQxvzq eRI1J4daQC08FjlSNnHiY1ozl1OIOTSVllLBqkhkYOaBbIKbMByCVPpj4cwzqAvC 5iBCmL2RalAQ/ZasYwdBid2L2VD4dWGJ9uHY6pi+i9f/Q7nxGRlXqhHo25J5T2Gl YE+75fS1lN9hQIkhZRlpiqOdpjJcxkkGJ0elXtRQ0t856lXUAOacIN+HW6aaLxj/ qv8tnaZO1M/Aw8t65N/8m/ktnBQEwXeX3rAcTpvaMLKAQRGP1oZdsyN5YzLpEi0h 0Y+yXyFdOYbTJBrGie+kRyh9RoRY3CLcQgRw/2u7ysRjBqdXPcaGXAH4ydE/67Qy hdD9DVwDkjzmxh/V4o8raIM+b3Ql4FuBR0X93f8A0LeanJUlgCrc4lokgdA1tPEf i/+epnnlEnBOpS4ht5PFn8FkpzeuKAR63AM5sKL18DYswpP5oCyyh76zMMoBh33k JFEwVSUDW8AH6YSMAA5eLp9bGTDOGYyvNHKBRXAv5GHQkBXq4ahjZEUh2b/r1903 FzivbyRtUsxEh62SCsXCZqKlb1BhXRkhSqdbBFbYsKYmVEkylQo= =7rUb -----END PGP SIGNATURE----- Merge tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - fix handling of folio private changes. The private value holds pointer to our extent buffer structure representing a metadata range. Release and create of the range was not properly synchronized when updating the private bit which ended up in double folio_put, leading to all sorts of breakage - fix a crash, reported as duplicate key in metadata, but caused by a race of fsync and size extending write. Requires prealloc target range + fsync and other conditions (log tree state, timing) - fix leak of qgroup extent records after transaction abort * tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: protect folio::private when attaching extent buffer folios btrfs: fix leak of qgroup extent records after transaction abort btrfs: fix crash on racing fsync and size-extending write into prealloc
This commit is contained in:
commit
07978330e6
@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
delayed_refs = &trans->delayed_refs;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (atomic_read(&delayed_refs->num_entries) == 0) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_debug(fs_info, "delayed_refs has NO entry");
|
||||
return;
|
||||
}
|
||||
|
||||
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct rb_node *n;
|
||||
|
@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer(
|
||||
struct folio *folio = page_folio(page);
|
||||
struct extent_buffer *exists;
|
||||
|
||||
lockdep_assert_held(&page->mapping->i_private_lock);
|
||||
|
||||
/*
|
||||
* For subpage case, we completely rely on radix tree to ensure we
|
||||
* don't try to insert two ebs for the same bytenr. So here we always
|
||||
@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
|
||||
* The caller needs to free the existing folios and retry using the same order.
|
||||
*/
|
||||
static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
|
||||
struct btrfs_subpage *prealloc,
|
||||
struct extent_buffer **found_eb_ret)
|
||||
{
|
||||
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
struct address_space *mapping = fs_info->btree_inode->i_mapping;
|
||||
const unsigned long index = eb->start >> PAGE_SHIFT;
|
||||
struct folio *existing_folio;
|
||||
struct folio *existing_folio = NULL;
|
||||
int ret;
|
||||
|
||||
ASSERT(found_eb_ret);
|
||||
@ -3774,12 +3777,14 @@ retry:
|
||||
ret = filemap_add_folio(mapping, eb->folios[i], index + i,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
if (!ret)
|
||||
return 0;
|
||||
goto finish;
|
||||
|
||||
existing_folio = filemap_lock_folio(mapping, index + i);
|
||||
/* The page cache only exists for a very short time, just retry. */
|
||||
if (IS_ERR(existing_folio))
|
||||
if (IS_ERR(existing_folio)) {
|
||||
existing_folio = NULL;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* For now, we should only have single-page folios for btree inode. */
|
||||
ASSERT(folio_nr_pages(existing_folio) == 1);
|
||||
@ -3790,14 +3795,13 @@ retry:
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (fs_info->nodesize < PAGE_SIZE) {
|
||||
/*
|
||||
* We're going to reuse the existing page, can drop our page
|
||||
* and subpage structure now.
|
||||
*/
|
||||
finish:
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
|
||||
/* We're going to reuse the existing page, can drop our folio now. */
|
||||
__free_page(folio_page(eb->folios[i], 0));
|
||||
eb->folios[i] = existing_folio;
|
||||
} else {
|
||||
} else if (existing_folio) {
|
||||
struct extent_buffer *existing_eb;
|
||||
|
||||
existing_eb = grab_extent_buffer(fs_info,
|
||||
@ -3805,6 +3809,7 @@ retry:
|
||||
if (existing_eb) {
|
||||
/* The extent buffer still exists, we can use it directly. */
|
||||
*found_eb_ret = existing_eb;
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
folio_unlock(existing_folio);
|
||||
folio_put(existing_folio);
|
||||
return 1;
|
||||
@ -3813,6 +3818,22 @@ retry:
|
||||
__free_page(folio_page(eb->folios[i], 0));
|
||||
eb->folios[i] = existing_folio;
|
||||
}
|
||||
eb->folio_size = folio_size(eb->folios[i]);
|
||||
eb->folio_shift = folio_shift(eb->folios[i]);
|
||||
/* Should not fail, as we have preallocated the memory. */
|
||||
ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
|
||||
ASSERT(!ret);
|
||||
/*
|
||||
* To inform we have an extra eb under allocation, so that
|
||||
* detach_extent_buffer_page() won't release the folio private when the
|
||||
* eb hasn't been inserted into radix tree yet.
|
||||
*
|
||||
* The ref will be decreased when the eb releases the page, in
|
||||
* detach_extent_buffer_page(). Thus needs no special handling in the
|
||||
* error path.
|
||||
*/
|
||||
btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
int attached = 0;
|
||||
struct extent_buffer *eb;
|
||||
struct extent_buffer *existing_eb = NULL;
|
||||
struct address_space *mapping = fs_info->btree_inode->i_mapping;
|
||||
struct btrfs_subpage *prealloc = NULL;
|
||||
u64 lockdep_owner = owner_root;
|
||||
bool page_contig = true;
|
||||
@ -3890,7 +3910,7 @@ reallocate:
|
||||
for (int i = 0; i < num_folios; i++) {
|
||||
struct folio *folio;
|
||||
|
||||
ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
|
||||
ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
|
||||
if (ret > 0) {
|
||||
ASSERT(existing_eb);
|
||||
goto out;
|
||||
@ -3927,24 +3947,6 @@ reallocate:
|
||||
* and free the allocated page.
|
||||
*/
|
||||
folio = eb->folios[i];
|
||||
eb->folio_size = folio_size(folio);
|
||||
eb->folio_shift = folio_shift(folio);
|
||||
spin_lock(&mapping->i_private_lock);
|
||||
/* Should not fail, as we have preallocated the memory */
|
||||
ret = attach_extent_buffer_folio(eb, folio, prealloc);
|
||||
ASSERT(!ret);
|
||||
/*
|
||||
* To inform we have extra eb under allocation, so that
|
||||
* detach_extent_buffer_page() won't release the folio private
|
||||
* when the eb hasn't yet been inserted into radix tree.
|
||||
*
|
||||
* The ref will be decreased when the eb released the page, in
|
||||
* detach_extent_buffer_page().
|
||||
* Thus needs no special handling in error path.
|
||||
*/
|
||||
btrfs_folio_inc_eb_refs(fs_info, folio);
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
|
||||
WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
|
||||
|
||||
/*
|
||||
|
@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
path->slots[0]++;
|
||||
continue;
|
||||
}
|
||||
if (!dropped_extents) {
|
||||
/*
|
||||
* Avoid logging extent items logged in past fsync calls
|
||||
* and leading to duplicate keys in the log tree.
|
||||
*/
|
||||
/*
|
||||
* Avoid overlapping items in the log tree. The first time we
|
||||
* get here, get rid of everything from a past fsync. After
|
||||
* that, if the current extent starts before the end of the last
|
||||
* extent we copied, truncate the last one. This can happen if
|
||||
* an ordered extent completion modifies the subvolume tree
|
||||
* while btrfs_next_leaf() has the tree unlocked.
|
||||
*/
|
||||
if (!dropped_extents || key.offset < truncate_offset) {
|
||||
ret = truncate_inode_items(trans, root->log_root, inode,
|
||||
truncate_offset,
|
||||
min(key.offset, truncate_offset),
|
||||
BTRFS_EXTENT_DATA_KEY);
|
||||
if (ret)
|
||||
goto out;
|
||||
dropped_extents = true;
|
||||
}
|
||||
truncate_offset = btrfs_file_extent_end(path);
|
||||
if (ins_nr == 0)
|
||||
start_slot = slot;
|
||||
ins_nr++;
|
||||
|
Loading…
Reference in New Issue
Block a user