bcachefs: Add a mechanism for blocking the journal

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2019-02-14 18:38:52 -05:00 · 2019-02-14 18:38:52 -05:00 · 768ac63924
commit 768ac63924
parent 8fe826f90a
13 changed files with 138 additions and 91 deletions
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
 static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
 {
 	u64 stranded	= c->write_points_nr * c->bucket_size_max;
-	u64 free	= bch2_fs_sectors_free(c);
+	u64 free	= bch2_fs_usage_read_short(c).free;

 	return stranded * factor > free;
 }
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 		struct bch_fs_usage *src = (void *)
 			bch2_acc_percpu_u64s((void *) c->usage[1], nr);

-		copy_fs_field(s.hidden,		"hidden");
-		copy_fs_field(s.data,		"data");
-		copy_fs_field(s.cached,		"cached");
-		copy_fs_field(s.reserved,	"reserved");
-		copy_fs_field(s.nr_inodes,	"nr_inodes");
+		copy_fs_field(hidden,		"hidden");
+		copy_fs_field(data,		"data");
+		copy_fs_field(cached,		"cached");
+		copy_fs_field(reserved,		"reserved");
+		copy_fs_field(nr_inodes,	"nr_inodes");

 		for (i = 0; i < BCH_REPLICAS_MAX; i++)
 			copy_fs_field(persistent_reserved[i],
@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)

 			bch2_replicas_entry_to_text(&PBUF(buf), e);

-			copy_fs_field(data[i], "%s", buf);
+			copy_fs_field(replicas[i], "%s", buf);
 		}
 	}

--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
 	usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);

 	for (i = 0; i < BCH_REPLICAS_MAX; i++)
-		usage->s.reserved += usage->persistent_reserved[i];
+		usage->reserved += usage->persistent_reserved[i];

 	for (i = 0; i < c->replicas.nr; i++) {
 		struct bch_replicas_entry *e =
@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
 		switch (e->data_type) {
 		case BCH_DATA_BTREE:
 		case BCH_DATA_USER:
-			usage->s.data	+= usage->data[i];
+			usage->data	+= usage->replicas[i];
 			break;
 		case BCH_DATA_CACHED:
-			usage->s.cached	+= usage->data[i];
+			usage->cached	+= usage->replicas[i];
 			break;
 		}
 	}
@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
 	percpu_up_write(&c->mark_lock);
 }

-#define bch2_usage_read_raw(_stats)					\
-({									\
-	typeof(*this_cpu_ptr(_stats)) _acc;				\
-									\
-	memset(&_acc, 0, sizeof(_acc));					\
-	acc_u64s_percpu((u64 *) &_acc,					\
-			(u64 __percpu *) _stats,			\
-			sizeof(_acc) / sizeof(u64));			\
-									\
-	_acc;								\
-})
-
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
 {
-	return bch2_usage_read_raw(ca->usage[0]);
+	struct bch_dev_usage ret;
+
+	memset(&ret, 0, sizeof(ret));
+	acc_u64s_percpu((u64 *) &ret,
+			(u64 __percpu *) ca->usage[0],
+			sizeof(ret) / sizeof(u64));
+
+	return ret;
 }

 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
@ -198,27 +193,44 @@ static u64 avail_factor(u64 r)
 	return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
 }

-u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
 {
-	return min(fs_usage.s.hidden +
-		   fs_usage.s.data +
-		   reserve_factor(fs_usage.s.reserved +
-				  fs_usage.s.online_reserved),
+	return min(fs_usage->hidden +
+		   fs_usage->data +
+		   reserve_factor(fs_usage->reserved +
+				  fs_usage->online_reserved),
 		   c->capacity);
 }

+static struct bch_fs_usage_short
+__bch2_fs_usage_read_short(struct bch_fs *c)
+{
+	struct bch_fs_usage_short ret;
+	u64 data, reserved;
+
+	ret.capacity = c->capacity -
+		percpu_u64_get(&c->usage[0]->hidden);
+
+	data		= percpu_u64_get(&c->usage[0]->data);
+	reserved	= percpu_u64_get(&c->usage[0]->reserved) +
+		percpu_u64_get(&c->usage[0]->online_reserved);
+
+	ret.used	= min(ret.capacity, data + reserve_factor(reserved));
+	ret.free	= ret.capacity - ret.used;
+
+	ret.nr_inodes	= percpu_u64_get(&c->usage[0]->nr_inodes);
+
+	return ret;
+}
+
 struct bch_fs_usage_short
 bch2_fs_usage_read_short(struct bch_fs *c)
 {
-	struct bch_fs_usage_summarized usage =
-		bch2_usage_read_raw(&c->usage[0]->s);
 	struct bch_fs_usage_short ret;

-	ret.capacity	= READ_ONCE(c->capacity) - usage.hidden;
-	ret.used	= min(ret.capacity, usage.data +
-			      reserve_factor(usage.reserved +
-					     usage.online_reserved));
-	ret.nr_inodes	= usage.nr_inodes;
+	percpu_down_read(&c->mark_lock);
+	ret = __bch2_fs_usage_read_short(c);
+	percpu_up_read(&c->mark_lock);

 	return ret;
 }
@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
 			struct bch_fs_usage *fs_usage,
 			struct disk_reservation *disk_res)
 {
-	s64 added = fs_usage->s.data + fs_usage->s.reserved;
+	s64 added = fs_usage->data + fs_usage->reserved;
 	s64 should_not_have_added;
 	int ret = 0;

@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,

 	if (added > 0) {
 		disk_res->sectors		-= added;
-		fs_usage->s.online_reserved	-= added;
+		fs_usage->online_reserved	-= added;
 	}

 	preempt_disable();
@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
 				  int nr, s64 size)
 {
 	if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
-		fs_usage->s.hidden	+= size;
+		fs_usage->hidden	+= size;

 	dev_usage->buckets[type]	+= nr;
 }
@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c,
 	BUG_ON(!sectors);

 	if (r->data_type == BCH_DATA_CACHED)
-		fs_usage->s.cached	+= sectors;
+		fs_usage->cached	+= sectors;
 	else
-		fs_usage->s.data	+= sectors;
-	fs_usage->data[idx]		+= sectors;
+		fs_usage->data		+= sectors;
+	fs_usage->replicas[idx]		+= sectors;
 }

 static inline void update_cached_sectors(struct bch_fs *c,
@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
 				fs_usage, journal_seq, flags, gc);
 	case KEY_TYPE_inode:
 		if (inserting)
-			fs_usage->s.nr_inodes++;
+			fs_usage->nr_inodes++;
 		else
-			fs_usage->s.nr_inodes--;
+			fs_usage->nr_inodes--;
 		return 0;
 	case KEY_TYPE_reservation: {
 		unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
 		replicas = clamp_t(unsigned, replicas, 1,
 				   ARRAY_SIZE(fs_usage->persistent_reserved));

-		fs_usage->s.reserved				+= sectors;
+		fs_usage->reserved				+= sectors;
 		fs_usage->persistent_reserved[replicas - 1]	+= sectors;
 		return 0;
 	}
@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
 {
 	percpu_u64_set(&c->pcpu->sectors_available, 0);

-	return avail_factor(bch2_fs_sectors_free(c));
+	return avail_factor(__bch2_fs_usage_read_short(c).free);
 }

 void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
 {
 	percpu_down_read(&c->mark_lock);
-	this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors);
+	this_cpu_sub(c->usage[0]->online_reserved, res->sectors);
 	percpu_up_read(&c->mark_lock);

 	res->sectors = 0;
@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,

 out:
 	pcpu->sectors_available		-= sectors;
-	this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+	this_cpu_add(c->usage[0]->online_reserved, sectors);
 	res->sectors			+= sectors;

 	preempt_enable();
@ -1136,7 +1148,7 @@ recalculate:
 	    (flags & BCH_DISK_RESERVATION_NOFAIL)) {
 		atomic64_set(&c->sectors_available,
 			     max_t(s64, 0, sectors_available - sectors));
-		this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+		this_cpu_add(c->usage[0]->online_reserved, sectors);
 		res->sectors			+= sectors;
 		ret = 0;
 	} else {
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)

 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);

-u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);

 struct bch_fs_usage_short
 bch2_fs_usage_read_short(struct bch_fs *);

-static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
-{
-	struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-
-	return usage.capacity - usage.used;
-}
-
 /* key/bucket marking: */

 void bch2_bucket_seq_cleanup(struct bch_fs *);
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@ -64,35 +64,33 @@ struct bch_dev_usage {
 struct bch_fs_usage {
 	/* all fields are in units of 512 byte sectors: */

-	/* summarized: */
-	struct bch_fs_usage_summarized {
-		u64		online_reserved;
+	u64			online_reserved;

-		/* fields after online_reserved are cleared/recalculated by gc: */
-		u64		gc_start[0];
+	/* fields after online_reserved are cleared/recalculated by gc: */
+	u64			gc_start[0];

-		u64		hidden;
-		u64		data;
-		u64		cached;
-		u64		reserved;
-		u64		nr_inodes;
+	u64			hidden;
+	u64			data;
+	u64			cached;
+	u64			reserved;
+	u64			nr_inodes;

-		/* XXX: add stats for compression ratio */
+	/* XXX: add stats for compression ratio */
 #if 0
-		u64		uncompressed;
-		u64		compressed;
+	u64			uncompressed;
+	u64			compressed;
 #endif
-	} s;

 	/* broken out: */

 	u64			persistent_reserved[BCH_REPLICAS_MAX];
-	u64			data[];
+	u64			replicas[];
 };

 struct bch_fs_usage_short {
 	u64			capacity;
 	u64			used;
+	u64			free;
 	u64			nr_inodes;
 };

--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
 		if (!src)
 			return -ENOMEM;

-		percpu_up_read(&c->mark_lock);
+		dst.used		= bch2_fs_sectors_used(c, src);
+		dst.online_reserved	= src->online_reserved;

-		dst.used		= bch2_fs_sectors_used(c, *src);
-		dst.online_reserved	= src->s.online_reserved;
+		percpu_up_read(&c->mark_lock);

 		for (i = 0; i < BCH_REPLICAS_MAX; i++) {
 			dst.persistent_reserved[i] =
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j)
 	lockdep_assert_held(&j->lock);
 	BUG_ON(journal_entry_is_open(j));

+	if (j->blocked)
+		return -EAGAIN;
+
 	if (!fifo_free(&j->pin))
 		return 0;

@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j)
 		spin_unlock(&j->lock);
 		fallthrough;
 	case JOURNAL_UNLOCKED:
-		return true;
+		return false;
 	}
 }

@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j)
 	return __journal_entry_close(j);
 }

+static bool journal_quiesced(struct journal *j)
+{
+	bool ret;
+
+	spin_lock(&j->lock);
+	ret = !j->reservations.prev_buf_unwritten &&
+		!journal_entry_is_open(j);
+	__journal_entry_close(j);
+	return ret;
+}
+
+static void journal_quiesce(struct journal *j)
+{
+	wait_event(j->wait, journal_quiesced(j));
+}
+
 static void journal_write_work(struct work_struct *work)
 {
 	struct journal *j = container_of(work, struct journal, write_work.work);
@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j)
 	return bch2_journal_flush_seq(j, seq);
 }

+/* block/unlock the journal: */
+
+void bch2_journal_unblock(struct journal *j)
+{
+	spin_lock(&j->lock);
+	j->blocked--;
+	spin_unlock(&j->lock);
+
+	journal_wake(j);
+}
+
+void bch2_journal_block(struct journal *j)
+{
+	spin_lock(&j->lock);
+	j->blocked++;
+	spin_unlock(&j->lock);
+
+	journal_quiesce(j);
+}
+
 /* allocate journal on a device: */

 static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j)
 	    c->btree_roots_dirty)
 		bch2_journal_meta(j);

-	BUG_ON(journal_entry_is_open(j) ||
-	       j->reservations.prev_buf_unwritten);
+	journal_quiesce(j);

 	BUG_ON(!bch2_journal_error(j) &&
 	       test_bit(JOURNAL_NOT_EMPTY, &j->flags));
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
 	set_bit(JOURNAL_REPLAY_DONE, &j->flags);
 }

+void bch2_journal_unblock(struct journal *);
+void bch2_journal_block(struct journal *);
+
 ssize_t bch2_journal_print_debug(struct journal *, char *);
 ssize_t bch2_journal_print_pins(struct journal *, char *);

--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@ -142,6 +142,9 @@ struct journal {

 	spinlock_t		lock;

+	/* if nonzero, we may not open a new journal entry: */
+	unsigned		blocked;
+
 	/* Used when waiting because the journal was full */
 	wait_queue_head_t	wait;
 	struct closure_waitlist	async_wait;
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
 					       le64_to_cpu(u->v));
 			break;
 		case FS_USAGE_INODES:
-			percpu_u64_set(&c->usage[0]->s.nr_inodes,
+			percpu_u64_set(&c->usage[0]->nr_inodes,
 				       le64_to_cpu(u->v));
 			break;
 		case FS_USAGE_KEY_VERSION:
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
 	*dst = *src;

 	for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-		if (!src->data[src_idx])
+		if (!src->replicas[src_idx])
 			continue;

 		dst_idx = __replicas_entry_idx(dst_r,
 				cpu_replicas_entry(src_r, src_idx));
 		BUG_ON(dst_idx < 0);

-		dst->data[dst_idx] = src->data[src_idx];
+		dst->replicas[dst_idx] = src->replicas[src_idx];
 	}
 }

@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
 		if (__replicas_has_entry(&c->replicas_gc, e))
 			continue;

-		v = percpu_u64_get(&c->usage[0]->data[i]);
+		v = percpu_u64_get(&c->usage[0]->replicas[i]);
 		if (!v)
 			continue;

@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
 		BUG_ON(ret < 0);
 	}

-	percpu_u64_set(&c->usage[0]->data[idx], sectors);
+	percpu_u64_set(&c->usage[0]->replicas[idx], sectors);

 	return 0;
 }
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
 	percpu_down_write(&c->mark_lock);

 	{
-		u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
+		u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
 		struct jset_entry_usage *u =
 			container_of(entry, struct jset_entry_usage, entry);

@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
 	for (i = 0; i < c->replicas.nr; i++) {
 		struct bch_replicas_entry *e =
 			cpu_replicas_entry(&c->replicas, i);
-		u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
+		u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
 		struct jset_entry_data_usage *u =
 			container_of(entry, struct jset_entry_data_usage, entry);

--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
 	pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);

 	pr_buf(&out, "hidden:\t\t\t\t%llu\n",
-	       fs_usage->s.hidden);
+	       fs_usage->hidden);
 	pr_buf(&out, "data:\t\t\t\t%llu\n",
-	       fs_usage->s.data);
+	       fs_usage->data);
 	pr_buf(&out, "cached:\t\t\t\t%llu\n",
-	       fs_usage->s.cached);
+	       fs_usage->cached);
 	pr_buf(&out, "reserved:\t\t\t%llu\n",
-	       fs_usage->s.reserved);
+	       fs_usage->reserved);
 	pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
-	       fs_usage->s.nr_inodes);
+	       fs_usage->nr_inodes);
 	pr_buf(&out, "online reserved:\t\t%llu\n",
-	       fs_usage->s.online_reserved);
+	       fs_usage->online_reserved);

 	for (i = 0;
 	     i < ARRAY_SIZE(fs_usage->persistent_reserved);
@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)

 		pr_buf(&out, "\t");
 		bch2_replicas_entry_to_text(&out, e);
-		pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
+		pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
 	}

 	percpu_up_read(&c->mark_lock);