for-6.11-rc6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmbYn2YACgkQxWXV+ddt WDum5Q//Topfw8yGOMpSUajZ7n4Iy81CknH6GnV2r0qj/0vK4XZ8a8PHpJPLn0gc neTGo62vfaQ1HstKPvXWMJkoew5cL+khXW6zaEnieVLvlrVGD9i5NgtmgiC/kK00 Pwj8h2MFhdrXEJEXdk0g9IVaGRs78lruGuc0eI0sGESMbZdQ4OsLToU4zFCqgb6b LZrHENyTIoYjiqMPYrZh4X4TxDV9lVw3XTbebB9vZPsC1Bj0H8uZ3rMU5hS7VboH e/c7qmJWs/Gq0CNCGvQmguO2eK29NVE24XHoLgsTwpYFSXW1VOLNUlihgkP1aZsB Zh7ETuMah7M/yjwXNASdM2mJcO3yVRryUZXApJFCdHTRz12aIcCYfIRCZZ+GQuQg gZaRgEW4kpTOmdUY3weeJcmfgQiHem0+cOy4dC6ykvNpfCwj3HcOft3U5qaR3C6p c+Gd4lurnWn3CtPmYZRQ/7g9vvKth7jXvBMTkPoS4KyaTe5Kk+ph9h7uUtyHZpQP /zxaZlYNMX1C+4atVTpQhRTBqHEbiK9BLDErWkqG0Dv6x/NJv3iDSAX+S64WWJwK +LkHW7m+5HnCQi++8uxE+V1dWispczbgIcMEmPoyQhhEVKHg9dx9EItr8MEvNpyd YIV6qfGoQTWzTPGbApLxe94WOm4tpcaFUbyaWjTrXexsYK6lo2I= =LHQV -----END PGP SIGNATURE----- Merge tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: - followup fix for direct io and fsync under some conditions, reported by QEMU users - fix a potential leak when disabling quotas while some extent tracking work can still happen - in zoned mode handle unexpected change of zone write pointer in RAID1-like block groups, turn the zones to read-only * tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix race between direct IO write and fsync when using same fd btrfs: zoned: handle broken write pointer on zones btrfs: qgroup: don't use extent changeset when not needed
This commit is contained in:
commit
1263a7bf8a
@ -459,7 +459,6 @@ struct btrfs_file_private {
|
||||
void *filldir_buf;
|
||||
u64 last_index;
|
||||
struct extent_state *llseek_cached_state;
|
||||
bool fsync_skip_inode_lock;
|
||||
};
|
||||
|
||||
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
|
||||
|
@ -864,13 +864,6 @@ again:
|
||||
if (IS_ERR_OR_NULL(dio)) {
|
||||
ret = PTR_ERR_OR_ZERO(dio);
|
||||
} else {
|
||||
struct btrfs_file_private stack_private = { 0 };
|
||||
struct btrfs_file_private *private;
|
||||
const bool have_private = (file->private_data != NULL);
|
||||
|
||||
if (!have_private)
|
||||
file->private_data = &stack_private;
|
||||
|
||||
/*
|
||||
* If we have a synchronous write, we must make sure the fsync
|
||||
* triggered by the iomap_dio_complete() call below doesn't
|
||||
@ -879,13 +872,10 @@ again:
|
||||
* partial writes due to the input buffer (or parts of it) not
|
||||
* being already faulted in.
|
||||
*/
|
||||
private = file->private_data;
|
||||
private->fsync_skip_inode_lock = true;
|
||||
ASSERT(current->journal_info == NULL);
|
||||
current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB;
|
||||
ret = iomap_dio_complete(dio);
|
||||
private->fsync_skip_inode_lock = false;
|
||||
|
||||
if (!have_private)
|
||||
file->private_data = NULL;
|
||||
current->journal_info = NULL;
|
||||
}
|
||||
|
||||
/* No increment (+=) because iomap returns a cumulative value. */
|
||||
|
@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
|
||||
*/
|
||||
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct btrfs_file_private *private = file->private_data;
|
||||
struct dentry *dentry = file_dentry(file);
|
||||
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
|
||||
struct btrfs_root *root = inode->root;
|
||||
@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
int ret = 0, err;
|
||||
u64 len;
|
||||
bool full_sync;
|
||||
const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false);
|
||||
bool skip_ilock = false;
|
||||
|
||||
if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
|
||||
skip_ilock = true;
|
||||
current->journal_info = NULL;
|
||||
lockdep_assert_held(&inode->vfs_inode.i_rwsem);
|
||||
}
|
||||
|
||||
trace_btrfs_sync_file(file, datasync);
|
||||
|
||||
|
@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
|
||||
int ret;
|
||||
|
||||
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
|
||||
extent_changeset_init(&changeset);
|
||||
return clear_record_extent_bits(&inode->io_tree, start,
|
||||
start + len - 1,
|
||||
EXTENT_QGROUP_RESERVED, &changeset);
|
||||
EXTENT_QGROUP_RESERVED, NULL);
|
||||
}
|
||||
|
||||
/* In release case, we shouldn't have @reserved */
|
||||
|
@ -27,6 +27,12 @@ struct btrfs_root_item;
|
||||
struct btrfs_root;
|
||||
struct btrfs_path;
|
||||
|
||||
/*
|
||||
* Signal that a direct IO write is in progress, to avoid deadlock for sync
|
||||
* direct IO writes when fsync is called during the direct IO write path.
|
||||
*/
|
||||
#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
|
||||
|
||||
/* Radix-tree tag for roots that are part of the trasaction. */
|
||||
#define BTRFS_ROOT_TRANS_TAG 0
|
||||
|
||||
|
@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
|
||||
|
||||
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
|
||||
btrfs_err(bg->fs_info,
|
||||
"zoned: cannot recover write pointer for zone %llu",
|
||||
@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
|
||||
}
|
||||
|
||||
bg->alloc_offset = zone_info[0].alloc_offset;
|
||||
bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* In case a device is missing we have a cap of 0, so don't use it. */
|
||||
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
|
||||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
|
||||
@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
|
||||
if (test_bit(0, active))
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
|
||||
}
|
||||
/* In case a device is missing we have a cap of 0, so don't use it. */
|
||||
bg->zone_capacity = min_not_zero(zone_info[0].capacity,
|
||||
zone_info[1].capacity);
|
||||
}
|
||||
|
||||
if (zone_info[0].alloc_offset != WP_MISSING_DEV)
|
||||
@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
unsigned long *active = NULL;
|
||||
u64 last_alloc = 0;
|
||||
u32 num_sequential = 0, num_conventional = 0;
|
||||
u64 profile;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
}
|
||||
}
|
||||
|
||||
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
switch (profile) {
|
||||
case 0: /* single */
|
||||
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
|
||||
break;
|
||||
@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
|
||||
profile != BTRFS_BLOCK_GROUP_RAID10) {
|
||||
/*
|
||||
* Detected broken write pointer. Make this block group
|
||||
* unallocatable by setting the allocation pointer at the end of
|
||||
* allocatable region. Relocating this block group will fix the
|
||||
* mismatch.
|
||||
*
|
||||
* Currently, we cannot handle RAID0 or RAID10 case like this
|
||||
* because we don't have a proper zone_capacity value. But,
|
||||
* reading from this block group won't work anyway by a missing
|
||||
* stripe.
|
||||
*/
|
||||
cache->alloc_offset = cache->zone_capacity;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
/* Reject non SINGLE data profiles without RST */
|
||||
if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
|
Loading…
Reference in New Issue
Block a user