From 768ac63924775d9fe2e76fbb254704d5ee3bcb85 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 14 Feb 2019 18:38:52 -0500 Subject: [PATCH] bcachefs: Add a mechanism for blocking the journal Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/btree_gc.c | 12 ++--- fs/bcachefs/buckets.c | 94 +++++++++++++++++++--------------- fs/bcachefs/buckets.h | 9 +--- fs/bcachefs/buckets_types.h | 28 +++++----- fs/bcachefs/chardev.c | 6 +-- fs/bcachefs/journal.c | 44 ++++++++++++++-- fs/bcachefs/journal.h | 3 ++ fs/bcachefs/journal_types.h | 3 ++ fs/bcachefs/recovery.c | 2 +- fs/bcachefs/replicas.c | 8 +-- fs/bcachefs/super-io.c | 4 +- fs/bcachefs/sysfs.c | 14 ++--- 13 files changed, 138 insertions(+), 91 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f40fca9328f9..ba0640e3f981 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head, static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) { u64 stranded = c->write_points_nr * c->bucket_size_max; - u64 free = bch2_fs_sectors_free(c); + u64 free = bch2_fs_usage_read_short(c).free; return stranded * factor > free; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 922d34abc675..5091966b7b54 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) struct bch_fs_usage *src = (void *) bch2_acc_percpu_u64s((void *) c->usage[1], nr); - copy_fs_field(s.hidden, "hidden"); - copy_fs_field(s.data, "data"); - copy_fs_field(s.cached, "cached"); - copy_fs_field(s.reserved, "reserved"); - copy_fs_field(s.nr_inodes, "nr_inodes"); + copy_fs_field(hidden, "hidden"); + copy_fs_field(data, "data"); + copy_fs_field(cached, "cached"); + copy_fs_field(reserved, "reserved"); + copy_fs_field(nr_inodes, "nr_inodes"); for (i = 0; i < BCH_REPLICAS_MAX; i++) copy_fs_field(persistent_reserved[i], @@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) bch2_replicas_entry_to_text(&PBUF(buf), e); - copy_fs_field(data[i], "%s", buf); + copy_fs_field(replicas[i], "%s", buf); } } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3286ee26f7e2..ac54d82f9e11 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c) usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr); for (i = 0; i < BCH_REPLICAS_MAX; i++) - usage->s.reserved += usage->persistent_reserved[i]; + usage->reserved += usage->persistent_reserved[i]; for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = @@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c) switch (e->data_type) { case BCH_DATA_BTREE: case BCH_DATA_USER: - usage->s.data += usage->data[i]; + usage->data += usage->replicas[i]; break; case BCH_DATA_CACHED: - usage->s.cached += usage->data[i]; + usage->cached += usage->replicas[i]; break; } } @@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c) percpu_up_write(&c->mark_lock); } -#define bch2_usage_read_raw(_stats) \ -({ \ - typeof(*this_cpu_ptr(_stats)) _acc; \ - \ - memset(&_acc, 0, sizeof(_acc)); \ - acc_u64s_percpu((u64 *) &_acc, \ - (u64 __percpu *) _stats, \ - sizeof(_acc) / sizeof(u64)); \ - \ - _acc; \ -}) - struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) { - return bch2_usage_read_raw(ca->usage[0]); + struct bch_dev_usage ret; + + memset(&ret, 0, sizeof(ret)); + acc_u64s_percpu((u64 *) &ret, + (u64 __percpu *) ca->usage[0], + sizeof(ret) / sizeof(u64)); + + return ret; } struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c) @@ -198,27 +193,44 @@ static u64 avail_factor(u64 r) return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage) +u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage) { - return min(fs_usage.s.hidden + - fs_usage.s.data + - reserve_factor(fs_usage.s.reserved + - fs_usage.s.online_reserved), + return min(fs_usage->hidden + + fs_usage->data + + reserve_factor(fs_usage->reserved + + fs_usage->online_reserved), c->capacity); } +static struct bch_fs_usage_short +__bch2_fs_usage_read_short(struct bch_fs *c) +{ + struct bch_fs_usage_short ret; + u64 data, reserved; + + ret.capacity = c->capacity - + percpu_u64_get(&c->usage[0]->hidden); + + data = percpu_u64_get(&c->usage[0]->data); + reserved = percpu_u64_get(&c->usage[0]->reserved) + + percpu_u64_get(&c->usage[0]->online_reserved); + + ret.used = min(ret.capacity, data + reserve_factor(reserved)); + ret.free = ret.capacity - ret.used; + + ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes); + + return ret; +} + struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *c) { - struct bch_fs_usage_summarized usage = - bch2_usage_read_raw(&c->usage[0]->s); struct bch_fs_usage_short ret; - ret.capacity = READ_ONCE(c->capacity) - usage.hidden; - ret.used = min(ret.capacity, usage.data + - reserve_factor(usage.reserved + - usage.online_reserved)); - ret.nr_inodes = usage.nr_inodes; + percpu_down_read(&c->mark_lock); + ret = __bch2_fs_usage_read_short(c); + percpu_up_read(&c->mark_lock); return ret; } @@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c, struct bch_fs_usage *fs_usage, struct disk_reservation *disk_res) { - s64 added = fs_usage->s.data + fs_usage->s.reserved; + s64 added = fs_usage->data + fs_usage->reserved; s64 should_not_have_added; int ret = 0; @@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c, if (added > 0) { disk_res->sectors -= added; - fs_usage->s.online_reserved -= added; + fs_usage->online_reserved -= added; } preempt_disable(); @@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage, int nr, s64 size) { if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL) - fs_usage->s.hidden += size; + fs_usage->hidden += size; dev_usage->buckets[type] += nr; } @@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c, BUG_ON(!sectors); if (r->data_type == BCH_DATA_CACHED) - fs_usage->s.cached += sectors; + fs_usage->cached += sectors; else - fs_usage->s.data += sectors; - fs_usage->data[idx] += sectors; + fs_usage->data += sectors; + fs_usage->replicas[idx] += sectors; } static inline void update_cached_sectors(struct bch_fs *c, @@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, fs_usage, journal_seq, flags, gc); case KEY_TYPE_inode: if (inserting) - fs_usage->s.nr_inodes++; + fs_usage->nr_inodes++; else - fs_usage->s.nr_inodes--; + fs_usage->nr_inodes--; return 0; case KEY_TYPE_reservation: { unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; @@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, replicas = clamp_t(unsigned, replicas, 1, ARRAY_SIZE(fs_usage->persistent_reserved)); - fs_usage->s.reserved += sectors; + fs_usage->reserved += sectors; fs_usage->persistent_reserved[replicas - 1] += sectors; return 0; } @@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c) { percpu_u64_set(&c->pcpu->sectors_available, 0); - return avail_factor(bch2_fs_sectors_free(c)); + return avail_factor(__bch2_fs_usage_read_short(c).free); } void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) { percpu_down_read(&c->mark_lock); - this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors); + this_cpu_sub(c->usage[0]->online_reserved, res->sectors); percpu_up_read(&c->mark_lock); res->sectors = 0; @@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, out: pcpu->sectors_available -= sectors; - this_cpu_add(c->usage[0]->s.online_reserved, sectors); + this_cpu_add(c->usage[0]->online_reserved, sectors); res->sectors += sectors; preempt_enable(); @@ -1136,7 +1148,7 @@ recalculate: (flags & BCH_DISK_RESERVATION_NOFAIL)) { atomic64_set(&c->sectors_available, max_t(s64, 0, sectors_available - sectors)); - this_cpu_add(c->usage[0]->s.online_reserved, sectors); + this_cpu_add(c->usage[0]->online_reserved, sectors); res->sectors += sectors; ret = 0; } else { diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 973bf605cbd9..67a1d17610f3 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c) struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *); -u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); +u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *); struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); -static inline u64 bch2_fs_sectors_free(struct bch_fs *c) -{ - struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); - - return usage.capacity - usage.used; -} - /* key/bucket marking: */ void bch2_bucket_seq_cleanup(struct bch_fs *); diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 6eaee889f1e1..348d062dd744 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -64,35 +64,33 @@ struct bch_dev_usage { struct bch_fs_usage { /* all fields are in units of 512 byte sectors: */ - /* summarized: */ - struct bch_fs_usage_summarized { - u64 online_reserved; + u64 online_reserved; - /* fields after online_reserved are cleared/recalculated by gc: */ - u64 gc_start[0]; + /* fields after online_reserved are cleared/recalculated by gc: */ + u64 gc_start[0]; - u64 hidden; - u64 data; - u64 cached; - u64 reserved; - u64 nr_inodes; + u64 hidden; + u64 data; + u64 cached; + u64 reserved; + u64 nr_inodes; - /* XXX: add stats for compression ratio */ + /* XXX: add stats for compression ratio */ #if 0 - u64 uncompressed; - u64 compressed; + u64 uncompressed; + u64 compressed; #endif - } s; /* broken out: */ u64 persistent_reserved[BCH_REPLICAS_MAX]; - u64 data[]; + u64 replicas[]; }; struct bch_fs_usage_short { u64 capacity; u64 used; + u64 free; u64 nr_inodes; }; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index f090b61f23f1..5ee38a6a442f 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c, if (!src) return -ENOMEM; - percpu_up_read(&c->mark_lock); + dst.used = bch2_fs_sectors_used(c, src); + dst.online_reserved = src->online_reserved; - dst.used = bch2_fs_sectors_used(c, *src); - dst.online_reserved = src->s.online_reserved; + percpu_up_read(&c->mark_lock); for (i = 0; i < BCH_REPLICAS_MAX; i++) { dst.persistent_reserved[i] = diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index dd10f1c993e5..cf4729b7a083 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j) lockdep_assert_held(&j->lock); BUG_ON(journal_entry_is_open(j)); + if (j->blocked) + return -EAGAIN; + if (!fifo_free(&j->pin)) return 0; @@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j) spin_unlock(&j->lock); fallthrough; case JOURNAL_UNLOCKED: - return true; + return false; } } @@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j) return __journal_entry_close(j); } +static bool journal_quiesced(struct journal *j) +{ + bool ret; + + spin_lock(&j->lock); + ret = !j->reservations.prev_buf_unwritten && + !journal_entry_is_open(j); + __journal_entry_close(j); + return ret; +} + +static void journal_quiesce(struct journal *j) +{ + wait_event(j->wait, journal_quiesced(j)); +} + static void journal_write_work(struct work_struct *work) { struct journal *j = container_of(work, struct journal, write_work.work); @@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j) return bch2_journal_flush_seq(j, seq); } +/* block/unlock the journal: */ + +void bch2_journal_unblock(struct journal *j) +{ + spin_lock(&j->lock); + j->blocked--; + spin_unlock(&j->lock); + + journal_wake(j); +} + +void bch2_journal_block(struct journal *j) +{ + spin_lock(&j->lock); + j->blocked++; + spin_unlock(&j->lock); + + journal_quiesce(j); +} + /* allocate journal on a device: */ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, @@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j) c->btree_roots_dirty) bch2_journal_meta(j); - BUG_ON(journal_entry_is_open(j) || - j->reservations.prev_buf_unwritten); + journal_quiesce(j); BUG_ON(!bch2_journal_error(j) && test_bit(JOURNAL_NOT_EMPTY, &j->flags)); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 6ef34bdae628..5290cdeab585 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j) set_bit(JOURNAL_REPLAY_DONE, &j->flags); } +void bch2_journal_unblock(struct journal *); +void bch2_journal_block(struct journal *); + ssize_t bch2_journal_print_debug(struct journal *, char *); ssize_t bch2_journal_print_pins(struct journal *, char *); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 5f6d2320c5cd..e952eb06eff5 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -142,6 +142,9 @@ struct journal { spinlock_t lock; + /* if nonzero, we may not open a new journal entry: */ + unsigned blocked; + /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index e28917cf2cec..5ceab8c14d72 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c, le64_to_cpu(u->v)); break; case FS_USAGE_INODES: - percpu_u64_set(&c->usage[0]->s.nr_inodes, + percpu_u64_set(&c->usage[0]->nr_inodes, le64_to_cpu(u->v)); break; case FS_USAGE_KEY_VERSION: diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 6fee8fe37688..03bb6b51d15f 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p, *dst = *src; for (src_idx = 0; src_idx < src_r->nr; src_idx++) { - if (!src->data[src_idx]) + if (!src->replicas[src_idx]) continue; dst_idx = __replicas_entry_idx(dst_r, cpu_replicas_entry(src_r, src_idx)); BUG_ON(dst_idx < 0); - dst->data[dst_idx] = src->data[src_idx]; + dst->replicas[dst_idx] = src->replicas[src_idx]; } } @@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) if (__replicas_has_entry(&c->replicas_gc, e)) continue; - v = percpu_u64_get(&c->usage[0]->data[i]); + v = percpu_u64_get(&c->usage[0]->replicas[i]); if (!v) continue; @@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c, BUG_ON(ret < 0); } - percpu_u64_set(&c->usage[0]->data[idx], sectors); + percpu_u64_set(&c->usage[0]->replicas[idx], sectors); return 0; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 0b3a761fe93e..66e174d93a9c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, percpu_down_write(&c->mark_lock); { - u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes); + u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes); struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); @@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - u64 sectors = percpu_u64_get(&c->usage[0]->data[i]); + u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]); struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 8ad7b6026d1b..361f7b7addcf 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity); pr_buf(&out, "hidden:\t\t\t\t%llu\n", - fs_usage->s.hidden); + fs_usage->hidden); pr_buf(&out, "data:\t\t\t\t%llu\n", - fs_usage->s.data); + fs_usage->data); pr_buf(&out, "cached:\t\t\t\t%llu\n", - fs_usage->s.cached); + fs_usage->cached); pr_buf(&out, "reserved:\t\t\t%llu\n", - fs_usage->s.reserved); + fs_usage->reserved); pr_buf(&out, "nr_inodes:\t\t\t%llu\n", - fs_usage->s.nr_inodes); + fs_usage->nr_inodes); pr_buf(&out, "online reserved:\t\t%llu\n", - fs_usage->s.online_reserved); + fs_usage->online_reserved); for (i = 0; i < ARRAY_SIZE(fs_usage->persistent_reserved); @@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) pr_buf(&out, "\t"); bch2_replicas_entry_to_text(&out, e); - pr_buf(&out, ":\t%llu\n", fs_usage->data[i]); + pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]); } percpu_up_read(&c->mark_lock);