1

bcachefs: bch2_folio_reservation_get_partial() is now better behaved

bch2_folio_reservation_get_partial(), on partial success, will now
return a reservation that's aligned to the filesystem blocksize.

This is a partial fix for fstests generic/299 - fio verify is badly
behaved in the presence of short writes that aren't aligned to its
blocksize.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-10-17 01:05:17 -04:00
parent 81e0b6c7c1
commit 335d318ef5
3 changed files with 57 additions and 32 deletions

View File

@ -1160,11 +1160,11 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
#define SECTORS_CACHE 1024 #define SECTORS_CACHE 1024
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags) u64 sectors, enum bch_reservation_flags flags)
{ {
struct bch_fs_pcpu *pcpu; struct bch_fs_pcpu *pcpu;
u64 old, get; u64 old, get;
s64 sectors_available; u64 sectors_available;
int ret; int ret;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
@ -1202,6 +1202,9 @@ recalculate:
percpu_u64_set(&c->pcpu->sectors_available, 0); percpu_u64_set(&c->pcpu->sectors_available, 0);
sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
sectors = min(sectors, sectors_available);
if (sectors <= sectors_available || if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) { (flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available, atomic64_set(&c->sectors_available,

View File

@ -344,14 +344,16 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
} }
} }
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0) enum bch_reservation_flags {
BCH_DISK_RESERVATION_NOFAIL = 1 << 0,
BCH_DISK_RESERVATION_PARTIAL = 1 << 1,
};
int __bch2_disk_reservation_add(struct bch_fs *, int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *,
struct disk_reservation *, u64, enum bch_reservation_flags);
u64, int);
static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags) u64 sectors, enum bch_reservation_flags flags)
{ {
#ifdef __KERNEL__ #ifdef __KERNEL__
u64 old, new; u64 old, new;

View File

@ -399,14 +399,17 @@ void bch2_folio_reservation_put(struct bch_fs *c,
bch2_quota_reservation_put(c, inode, &res->quota); bch2_quota_reservation_put(c, inode, &res->quota);
} }
int bch2_folio_reservation_get(struct bch_fs *c, static int __bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct folio *folio, struct folio *folio,
struct bch2_folio_reservation *res, struct bch2_folio_reservation *res,
size_t offset, size_t len) size_t offset, size_t len,
bool partial)
{ {
struct bch_folio *s = bch2_folio_create(folio, 0); struct bch_folio *s = bch2_folio_create(folio, 0);
unsigned i, disk_sectors = 0, quota_sectors = 0; unsigned i, disk_sectors = 0, quota_sectors = 0;
struct disk_reservation disk_res = {};
size_t reserved = len;
int ret; int ret;
if (!s) if (!s)
@ -422,23 +425,56 @@ int bch2_folio_reservation_get(struct bch_fs *c,
} }
if (disk_sectors) { if (disk_sectors) {
ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
if (unlikely(disk_res.sectors != disk_sectors)) {
disk_sectors = quota_sectors = 0;
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
if (disk_sectors > disk_res.sectors) {
/*
* Make sure to get a reservation that's
* aligned to the filesystem blocksize:
*/
unsigned reserved_offset = round_down(i << 9, block_bytes(c));
reserved = clamp(reserved_offset, offset, offset + len) - offset;
if (!reserved) {
bch2_disk_reservation_put(c, &disk_res);
return -BCH_ERR_ENOSPC_disk_reservation;
}
break;
}
quota_sectors += s->s[i].state == SECTOR_unallocated;
}
}
} }
if (quota_sectors) { if (quota_sectors) {
ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true); ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
if (unlikely(ret)) { if (unlikely(ret)) {
struct disk_reservation tmp = { .sectors = disk_sectors }; bch2_disk_reservation_put(c, &disk_res);
res->disk.sectors -= disk_sectors;
bch2_disk_reservation_put(c, &tmp);
return ret; return ret;
} }
} }
return 0; res->disk.sectors += disk_res.sectors;
return partial ? reserved : 0;
}
int bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
} }
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c, ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
@ -447,23 +483,7 @@ ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
struct bch2_folio_reservation *res, struct bch2_folio_reservation *res,
size_t offset, size_t len) size_t offset, size_t len)
{ {
size_t l, reserved = 0; return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
int ret;
while ((l = len - reserved)) {
while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
return reserved ?: ret;
len = reserved + l;
l /= 2;
}
offset += l;
reserved += l;
}
return reserved;
} }
static void bch2_clear_folio_bits(struct folio *folio) static void bch2_clear_folio_bits(struct folio *folio)