bcachefs: Add a mechanism for blocking the journal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
8fe826f90a
commit
768ac63924
@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
|
||||
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
|
||||
{
|
||||
u64 stranded = c->write_points_nr * c->bucket_size_max;
|
||||
u64 free = bch2_fs_sectors_free(c);
|
||||
u64 free = bch2_fs_usage_read_short(c).free;
|
||||
|
||||
return stranded * factor > free;
|
||||
}
|
||||
|
@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
struct bch_fs_usage *src = (void *)
|
||||
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
|
||||
|
||||
copy_fs_field(s.hidden, "hidden");
|
||||
copy_fs_field(s.data, "data");
|
||||
copy_fs_field(s.cached, "cached");
|
||||
copy_fs_field(s.reserved, "reserved");
|
||||
copy_fs_field(s.nr_inodes, "nr_inodes");
|
||||
copy_fs_field(hidden, "hidden");
|
||||
copy_fs_field(data, "data");
|
||||
copy_fs_field(cached, "cached");
|
||||
copy_fs_field(reserved, "reserved");
|
||||
copy_fs_field(nr_inodes, "nr_inodes");
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
copy_fs_field(persistent_reserved[i],
|
||||
@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
|
||||
|
||||
bch2_replicas_entry_to_text(&PBUF(buf), e);
|
||||
|
||||
copy_fs_field(data[i], "%s", buf);
|
||||
copy_fs_field(replicas[i], "%s", buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++)
|
||||
usage->s.reserved += usage->persistent_reserved[i];
|
||||
usage->reserved += usage->persistent_reserved[i];
|
||||
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
switch (e->data_type) {
|
||||
case BCH_DATA_BTREE:
|
||||
case BCH_DATA_USER:
|
||||
usage->s.data += usage->data[i];
|
||||
usage->data += usage->replicas[i];
|
||||
break;
|
||||
case BCH_DATA_CACHED:
|
||||
usage->s.cached += usage->data[i];
|
||||
usage->cached += usage->replicas[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
|
||||
percpu_up_write(&c->mark_lock);
|
||||
}
|
||||
|
||||
#define bch2_usage_read_raw(_stats) \
|
||||
({ \
|
||||
typeof(*this_cpu_ptr(_stats)) _acc; \
|
||||
\
|
||||
memset(&_acc, 0, sizeof(_acc)); \
|
||||
acc_u64s_percpu((u64 *) &_acc, \
|
||||
(u64 __percpu *) _stats, \
|
||||
sizeof(_acc) / sizeof(u64)); \
|
||||
\
|
||||
_acc; \
|
||||
})
|
||||
|
||||
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
return bch2_usage_read_raw(ca->usage[0]);
|
||||
struct bch_dev_usage ret;
|
||||
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
acc_u64s_percpu((u64 *) &ret,
|
||||
(u64 __percpu *) ca->usage[0],
|
||||
sizeof(ret) / sizeof(u64));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
|
||||
@ -198,27 +193,44 @@ static u64 avail_factor(u64 r)
|
||||
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
|
||||
{
|
||||
return min(fs_usage.s.hidden +
|
||||
fs_usage.s.data +
|
||||
reserve_factor(fs_usage.s.reserved +
|
||||
fs_usage.s.online_reserved),
|
||||
return min(fs_usage->hidden +
|
||||
fs_usage->data +
|
||||
reserve_factor(fs_usage->reserved +
|
||||
fs_usage->online_reserved),
|
||||
c->capacity);
|
||||
}
|
||||
|
||||
static struct bch_fs_usage_short
|
||||
__bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_short ret;
|
||||
u64 data, reserved;
|
||||
|
||||
ret.capacity = c->capacity -
|
||||
percpu_u64_get(&c->usage[0]->hidden);
|
||||
|
||||
data = percpu_u64_get(&c->usage[0]->data);
|
||||
reserved = percpu_u64_get(&c->usage[0]->reserved) +
|
||||
percpu_u64_get(&c->usage[0]->online_reserved);
|
||||
|
||||
ret.used = min(ret.capacity, data + reserve_factor(reserved));
|
||||
ret.free = ret.capacity - ret.used;
|
||||
|
||||
ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_summarized usage =
|
||||
bch2_usage_read_raw(&c->usage[0]->s);
|
||||
struct bch_fs_usage_short ret;
|
||||
|
||||
ret.capacity = READ_ONCE(c->capacity) - usage.hidden;
|
||||
ret.used = min(ret.capacity, usage.data +
|
||||
reserve_factor(usage.reserved +
|
||||
usage.online_reserved));
|
||||
ret.nr_inodes = usage.nr_inodes;
|
||||
percpu_down_read(&c->mark_lock);
|
||||
ret = __bch2_fs_usage_read_short(c);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct disk_reservation *disk_res)
|
||||
{
|
||||
s64 added = fs_usage->s.data + fs_usage->s.reserved;
|
||||
s64 added = fs_usage->data + fs_usage->reserved;
|
||||
s64 should_not_have_added;
|
||||
int ret = 0;
|
||||
|
||||
@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
|
||||
|
||||
if (added > 0) {
|
||||
disk_res->sectors -= added;
|
||||
fs_usage->s.online_reserved -= added;
|
||||
fs_usage->online_reserved -= added;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
|
||||
int nr, s64 size)
|
||||
{
|
||||
if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
|
||||
fs_usage->s.hidden += size;
|
||||
fs_usage->hidden += size;
|
||||
|
||||
dev_usage->buckets[type] += nr;
|
||||
}
|
||||
@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c,
|
||||
BUG_ON(!sectors);
|
||||
|
||||
if (r->data_type == BCH_DATA_CACHED)
|
||||
fs_usage->s.cached += sectors;
|
||||
fs_usage->cached += sectors;
|
||||
else
|
||||
fs_usage->s.data += sectors;
|
||||
fs_usage->data[idx] += sectors;
|
||||
fs_usage->data += sectors;
|
||||
fs_usage->replicas[idx] += sectors;
|
||||
}
|
||||
|
||||
static inline void update_cached_sectors(struct bch_fs *c,
|
||||
@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_inode:
|
||||
if (inserting)
|
||||
fs_usage->s.nr_inodes++;
|
||||
fs_usage->nr_inodes++;
|
||||
else
|
||||
fs_usage->s.nr_inodes--;
|
||||
fs_usage->nr_inodes--;
|
||||
return 0;
|
||||
case KEY_TYPE_reservation: {
|
||||
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
replicas = clamp_t(unsigned, replicas, 1,
|
||||
ARRAY_SIZE(fs_usage->persistent_reserved));
|
||||
|
||||
fs_usage->s.reserved += sectors;
|
||||
fs_usage->reserved += sectors;
|
||||
fs_usage->persistent_reserved[replicas - 1] += sectors;
|
||||
return 0;
|
||||
}
|
||||
@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
|
||||
{
|
||||
percpu_u64_set(&c->pcpu->sectors_available, 0);
|
||||
|
||||
return avail_factor(bch2_fs_sectors_free(c));
|
||||
return avail_factor(__bch2_fs_usage_read_short(c).free);
|
||||
}
|
||||
|
||||
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
|
||||
{
|
||||
percpu_down_read(&c->mark_lock);
|
||||
this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors);
|
||||
this_cpu_sub(c->usage[0]->online_reserved, res->sectors);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
res->sectors = 0;
|
||||
@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
|
||||
|
||||
out:
|
||||
pcpu->sectors_available -= sectors;
|
||||
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
|
||||
preempt_enable();
|
||||
@ -1136,7 +1148,7 @@ recalculate:
|
||||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
|
||||
atomic64_set(&c->sectors_available,
|
||||
max_t(s64, 0, sectors_available - sectors));
|
||||
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
|
||||
this_cpu_add(c->usage[0]->online_reserved, sectors);
|
||||
res->sectors += sectors;
|
||||
ret = 0;
|
||||
} else {
|
||||
|
@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
|
||||
|
||||
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
|
||||
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *);
|
||||
|
||||
static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
|
||||
|
||||
return usage.capacity - usage.used;
|
||||
}
|
||||
|
||||
/* key/bucket marking: */
|
||||
|
||||
void bch2_bucket_seq_cleanup(struct bch_fs *);
|
||||
|
@ -64,35 +64,33 @@ struct bch_dev_usage {
|
||||
struct bch_fs_usage {
|
||||
/* all fields are in units of 512 byte sectors: */
|
||||
|
||||
/* summarized: */
|
||||
struct bch_fs_usage_summarized {
|
||||
u64 online_reserved;
|
||||
u64 online_reserved;
|
||||
|
||||
/* fields after online_reserved are cleared/recalculated by gc: */
|
||||
u64 gc_start[0];
|
||||
/* fields after online_reserved are cleared/recalculated by gc: */
|
||||
u64 gc_start[0];
|
||||
|
||||
u64 hidden;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
u64 nr_inodes;
|
||||
u64 hidden;
|
||||
u64 data;
|
||||
u64 cached;
|
||||
u64 reserved;
|
||||
u64 nr_inodes;
|
||||
|
||||
/* XXX: add stats for compression ratio */
|
||||
/* XXX: add stats for compression ratio */
|
||||
#if 0
|
||||
u64 uncompressed;
|
||||
u64 compressed;
|
||||
u64 uncompressed;
|
||||
u64 compressed;
|
||||
#endif
|
||||
} s;
|
||||
|
||||
/* broken out: */
|
||||
|
||||
u64 persistent_reserved[BCH_REPLICAS_MAX];
|
||||
u64 data[];
|
||||
u64 replicas[];
|
||||
};
|
||||
|
||||
struct bch_fs_usage_short {
|
||||
u64 capacity;
|
||||
u64 used;
|
||||
u64 free;
|
||||
u64 nr_inodes;
|
||||
};
|
||||
|
||||
|
@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
|
||||
if (!src)
|
||||
return -ENOMEM;
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
dst.used = bch2_fs_sectors_used(c, src);
|
||||
dst.online_reserved = src->online_reserved;
|
||||
|
||||
dst.used = bch2_fs_sectors_used(c, *src);
|
||||
dst.online_reserved = src->s.online_reserved;
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
|
||||
dst.persistent_reserved[i] =
|
||||
|
@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j)
|
||||
lockdep_assert_held(&j->lock);
|
||||
BUG_ON(journal_entry_is_open(j));
|
||||
|
||||
if (j->blocked)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!fifo_free(&j->pin))
|
||||
return 0;
|
||||
|
||||
@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j)
|
||||
spin_unlock(&j->lock);
|
||||
fallthrough;
|
||||
case JOURNAL_UNLOCKED:
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j)
|
||||
return __journal_entry_close(j);
|
||||
}
|
||||
|
||||
static bool journal_quiesced(struct journal *j)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = !j->reservations.prev_buf_unwritten &&
|
||||
!journal_entry_is_open(j);
|
||||
__journal_entry_close(j);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void journal_quiesce(struct journal *j)
|
||||
{
|
||||
wait_event(j->wait, journal_quiesced(j));
|
||||
}
|
||||
|
||||
static void journal_write_work(struct work_struct *work)
|
||||
{
|
||||
struct journal *j = container_of(work, struct journal, write_work.work);
|
||||
@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j)
|
||||
return bch2_journal_flush_seq(j, seq);
|
||||
}
|
||||
|
||||
/* block/unlock the journal: */
|
||||
|
||||
void bch2_journal_unblock(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
j->blocked--;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_wake(j);
|
||||
}
|
||||
|
||||
void bch2_journal_block(struct journal *j)
|
||||
{
|
||||
spin_lock(&j->lock);
|
||||
j->blocked++;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
journal_quiesce(j);
|
||||
}
|
||||
|
||||
/* allocate journal on a device: */
|
||||
|
||||
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
||||
@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
c->btree_roots_dirty)
|
||||
bch2_journal_meta(j);
|
||||
|
||||
BUG_ON(journal_entry_is_open(j) ||
|
||||
j->reservations.prev_buf_unwritten);
|
||||
journal_quiesce(j);
|
||||
|
||||
BUG_ON(!bch2_journal_error(j) &&
|
||||
test_bit(JOURNAL_NOT_EMPTY, &j->flags));
|
||||
|
@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
|
||||
}
|
||||
|
||||
void bch2_journal_unblock(struct journal *);
|
||||
void bch2_journal_block(struct journal *);
|
||||
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
|
||||
|
@ -142,6 +142,9 @@ struct journal {
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
/* if nonzero, we may not open a new journal entry: */
|
||||
unsigned blocked;
|
||||
|
||||
/* Used when waiting because the journal was full */
|
||||
wait_queue_head_t wait;
|
||||
struct closure_waitlist async_wait;
|
||||
|
@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_INODES:
|
||||
percpu_u64_set(&c->usage[0]->s.nr_inodes,
|
||||
percpu_u64_set(&c->usage[0]->nr_inodes,
|
||||
le64_to_cpu(u->v));
|
||||
break;
|
||||
case FS_USAGE_KEY_VERSION:
|
||||
|
@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
|
||||
*dst = *src;
|
||||
|
||||
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
|
||||
if (!src->data[src_idx])
|
||||
if (!src->replicas[src_idx])
|
||||
continue;
|
||||
|
||||
dst_idx = __replicas_entry_idx(dst_r,
|
||||
cpu_replicas_entry(src_r, src_idx));
|
||||
BUG_ON(dst_idx < 0);
|
||||
|
||||
dst->data[dst_idx] = src->data[src_idx];
|
||||
dst->replicas[dst_idx] = src->replicas[src_idx];
|
||||
}
|
||||
}
|
||||
|
||||
@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
|
||||
if (__replicas_has_entry(&c->replicas_gc, e))
|
||||
continue;
|
||||
|
||||
v = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
v = percpu_u64_get(&c->usage[0]->replicas[i]);
|
||||
if (!v)
|
||||
continue;
|
||||
|
||||
@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
percpu_u64_set(&c->usage[0]->data[idx], sectors);
|
||||
percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
percpu_down_write(&c->mark_lock);
|
||||
|
||||
{
|
||||
u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
|
||||
u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
|
||||
struct jset_entry_usage *u =
|
||||
container_of(entry, struct jset_entry_usage, entry);
|
||||
|
||||
@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
|
||||
for (i = 0; i < c->replicas.nr; i++) {
|
||||
struct bch_replicas_entry *e =
|
||||
cpu_replicas_entry(&c->replicas, i);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
|
||||
u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
|
||||
struct jset_entry_data_usage *u =
|
||||
container_of(entry, struct jset_entry_data_usage, entry);
|
||||
|
||||
|
@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
|
||||
|
||||
pr_buf(&out, "hidden:\t\t\t\t%llu\n",
|
||||
fs_usage->s.hidden);
|
||||
fs_usage->hidden);
|
||||
pr_buf(&out, "data:\t\t\t\t%llu\n",
|
||||
fs_usage->s.data);
|
||||
fs_usage->data);
|
||||
pr_buf(&out, "cached:\t\t\t\t%llu\n",
|
||||
fs_usage->s.cached);
|
||||
fs_usage->cached);
|
||||
pr_buf(&out, "reserved:\t\t\t%llu\n",
|
||||
fs_usage->s.reserved);
|
||||
fs_usage->reserved);
|
||||
pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
|
||||
fs_usage->s.nr_inodes);
|
||||
fs_usage->nr_inodes);
|
||||
pr_buf(&out, "online reserved:\t\t%llu\n",
|
||||
fs_usage->s.online_reserved);
|
||||
fs_usage->online_reserved);
|
||||
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(fs_usage->persistent_reserved);
|
||||
@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
|
||||
|
||||
pr_buf(&out, "\t");
|
||||
bch2_replicas_entry_to_text(&out, e);
|
||||
pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
|
||||
pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
|
||||
}
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
Loading…
Reference in New Issue
Block a user