From b9fa375bab2786d0d2c5435b5e3fceaf6594aaf3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 Mar 2023 20:38:46 -0500 Subject: [PATCH] bcachefs: bch2_fs_moving_ctxts_to_text() This also adds bch2_write_op_to_text(): now we can see outstand moves, useful for debugging shutdown with the upcoming BCH_WRITE_WAIT_FOR_EC and likely for other things in the future. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 16 +++++-- fs/bcachefs/io.c | 28 +++++++++++ fs/bcachefs/io.h | 61 +++++++++++------------- fs/bcachefs/move.c | 103 ++++++++++++++++++++++++++++++++++++++--- fs/bcachefs/move.h | 9 ++++ fs/bcachefs/super.c | 3 +- fs/bcachefs/sysfs.c | 26 +++-------- 7 files changed, 179 insertions(+), 67 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index baaa4cd3caa7..8be65ebb34ad 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -214,8 +214,11 @@ #define BCH_WRITE_REF_DEBUG #endif +#ifndef dynamic_fault #define dynamic_fault(...) 0 -#define race_fault(...) 0 +#endif + +#define race_fault(...) dynamic_fault("bcachefs:race") #define trace_and_count(_c, _name, ...) \ do { \ @@ -922,6 +925,13 @@ struct bch_fs { mempool_t large_bkey_pool; + /* MOVE.C */ + struct list_head moving_context_list; + struct mutex moving_context_lock; + + struct list_head data_progress_list; + struct mutex data_progress_lock; + /* REBALANCE */ struct bch_fs_rebalance rebalance; @@ -932,10 +942,6 @@ struct bch_fs { bool copygc_running; wait_queue_head_t copygc_running_wq; - /* DATA PROGRESS STATS */ - struct list_head data_progress_list; - struct mutex data_progress_lock; - /* STRIPES: */ GENRADIX(struct stripe) stripes; GENRADIX(struct gc_stripe) gc_stripes; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 6daf5f4a905c..1b093650ff9a 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1870,6 +1870,34 @@ err: op->end_io(op); } +const char * const bch2_write_flags[] = { +#define x(f) #f, + BCH_WRITE_FLAGS() +#undef x + NULL +}; + +void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) +{ + prt_str(out, "pos: "); + bch2_bpos_to_text(out, op->pos); + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_str(out, "started: "); + bch2_pr_time_units(out, local_clock() - op->start_time); + prt_newline(out); + + prt_str(out, "flags: "); + prt_bitflags(out, bch2_write_flags, op->flags); + prt_newline(out); + + prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl)); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} + /* Cache promotion on read */ struct promote_op { diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 77a4a1cef71c..87d80fb28c05 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -28,41 +28,34 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, const char *bch2_blk_status_to_str(blk_status_t); -enum bch_write_flags { - __BCH_WRITE_ALLOC_NOWAIT, - __BCH_WRITE_CACHED, - __BCH_WRITE_DATA_ENCODED, - __BCH_WRITE_PAGES_STABLE, - __BCH_WRITE_PAGES_OWNED, - __BCH_WRITE_ONLY_SPECIFIED_DEVS, - __BCH_WRITE_WROTE_DATA_INLINE, - __BCH_WRITE_FROM_INTERNAL, - __BCH_WRITE_CHECK_ENOSPC, - __BCH_WRITE_SYNC, - __BCH_WRITE_MOVE, - __BCH_WRITE_IN_WORKER, - __BCH_WRITE_DONE, - __BCH_WRITE_IO_ERROR, - __BCH_WRITE_CONVERT_UNWRITTEN, +#define BCH_WRITE_FLAGS() \ + x(ALLOC_NOWAIT) \ + x(CACHED) \ + x(DATA_ENCODED) \ + x(PAGES_STABLE) \ + x(PAGES_OWNED) \ + x(ONLY_SPECIFIED_DEVS) \ + x(WROTE_DATA_INLINE) \ + x(FROM_INTERNAL) \ + x(CHECK_ENOSPC) \ + x(SYNC) \ + x(MOVE) \ + x(IN_WORKER) \ + x(DONE) \ + x(IO_ERROR) \ + x(CONVERT_UNWRITTEN) + +enum __bch_write_flags { +#define x(f) __BCH_WRITE_##f, + BCH_WRITE_FLAGS() +#undef x }; -#define BCH_WRITE_ALLOC_NOWAIT (1U << __BCH_WRITE_ALLOC_NOWAIT) -#define BCH_WRITE_CACHED (1U << __BCH_WRITE_CACHED) -#define BCH_WRITE_DATA_ENCODED (1U << __BCH_WRITE_DATA_ENCODED) -#define BCH_WRITE_PAGES_STABLE (1U << __BCH_WRITE_PAGES_STABLE) -#define BCH_WRITE_PAGES_OWNED (1U << __BCH_WRITE_PAGES_OWNED) -#define BCH_WRITE_ONLY_SPECIFIED_DEVS (1U << __BCH_WRITE_ONLY_SPECIFIED_DEVS) -#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE) -#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL) -#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC) -#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC) -#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE) - -/* Internal: */ -#define BCH_WRITE_IN_WORKER (1U << __BCH_WRITE_IN_WORKER) -#define BCH_WRITE_DONE (1U << __BCH_WRITE_DONE) -#define BCH_WRITE_IO_ERROR (1U << __BCH_WRITE_IO_ERROR) -#define BCH_WRITE_CONVERT_UNWRITTEN (1U << __BCH_WRITE_CONVERT_UNWRITTEN) +enum bch_write_flags { +#define x(f) BCH_WRITE_##f = 1U << __BCH_WRITE_##f, + BCH_WRITE_FLAGS() +#undef x +}; static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) { @@ -124,6 +117,8 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio) return wbio; } +void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); + struct bch_devs_mask; struct cache_promote_op; struct extent_ptr_decoded; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index de10f388b8d3..f74ef947cac5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -40,7 +40,8 @@ static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats) } struct moving_io { - struct list_head list; + struct list_head read_list; + struct list_head io_list; struct move_bucket_in_flight *b; struct closure cl; bool read_completed; @@ -64,7 +65,12 @@ static void move_free(struct moving_io *io) atomic_dec(&io->b->count); bch2_data_update_exit(&io->write); + + mutex_lock(&ctxt->lock); + list_del(&io->io_list); wake_up(&ctxt->wait); + mutex_unlock(&ctxt->lock); + bch2_write_ref_put(c, BCH_WRITE_REF_move); kfree(io); } @@ -100,7 +106,7 @@ static void move_write(struct moving_io *io) struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) { struct moving_io *io = - list_first_entry_or_null(&ctxt->reads, struct moving_io, list); + list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list); return io && io->read_completed ? io : NULL; } @@ -127,7 +133,7 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt, bch2_trans_unlock(trans); while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { - list_del(&io->list); + list_del(&io->read_list); move_write(io); } } @@ -144,6 +150,8 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt, void bch2_moving_ctxt_exit(struct moving_context *ctxt) { + struct bch_fs *c = ctxt->c; + move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); closure_sync(&ctxt->cl); @@ -153,12 +161,15 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) EBUG_ON(atomic_read(&ctxt->read_ios)); if (ctxt->stats) { - progress_list_del(ctxt->c, ctxt->stats); - - trace_move_data(ctxt->c, + progress_list_del(c, ctxt->stats); + trace_move_data(c, atomic64_read(&ctxt->stats->sectors_moved), atomic64_read(&ctxt->stats->keys_moved)); } + + mutex_lock(&c->moving_context_lock); + list_del(&ctxt->list); + mutex_unlock(&c->moving_context_lock); } void bch2_moving_ctxt_init(struct moving_context *ctxt, @@ -171,15 +182,23 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, memset(ctxt, 0, sizeof(*ctxt)); ctxt->c = c; + ctxt->fn = (void *) _RET_IP_; ctxt->rate = rate; ctxt->stats = stats; ctxt->wp = wp; ctxt->wait_on_copygc = wait_on_copygc; closure_init_stack(&ctxt->cl); + + mutex_init(&ctxt->lock); INIT_LIST_HEAD(&ctxt->reads); + INIT_LIST_HEAD(&ctxt->ios); init_waitqueue_head(&ctxt->wait); + mutex_lock(&c->moving_context_lock); + list_add(&ctxt->list, &c->moving_context_list); + mutex_unlock(&c->moving_context_lock); + if (stats) { progress_list_add(c, stats); stats->data_type = BCH_DATA_user; @@ -280,6 +299,7 @@ static int bch2_move_extent(struct btree_trans *trans, if (!io) goto err; + INIT_LIST_HEAD(&io->io_list); io->write.ctxt = ctxt; io->read_sectors = k.k->size; io->write_sectors = k.k->size; @@ -333,9 +353,14 @@ static int bch2_move_extent(struct btree_trans *trans, this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); trace_move_extent_read(k.k); + + mutex_lock(&ctxt->lock); atomic_add(io->read_sectors, &ctxt->read_sectors); atomic_inc(&ctxt->read_ios); - list_add_tail(&io->list, &ctxt->reads); + + list_add_tail(&io->read_list, &ctxt->reads); + list_add_tail(&io->io_list, &ctxt->ios); + mutex_unlock(&ctxt->lock); /* * dropped by move_read_endio() - guards against use after free of @@ -1107,3 +1132,67 @@ int bch2_data_job(struct bch_fs *c, return ret; } + +void bch2_data_jobs_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct bch_move_stats *stats; + + mutex_lock(&c->data_progress_lock); + list_for_each_entry(stats, &c->data_progress_list, list) { + prt_printf(out, "%s: data type %s btree_id %s position: ", + stats->name, + bch2_data_types[stats->data_type], + bch2_btree_ids[stats->btree_id]); + bch2_bpos_to_text(out, stats->pos); + prt_printf(out, "%s", "\n"); + } + mutex_unlock(&c->data_progress_lock); +} + +static void bch2_moving_ctxt_to_text(struct printbuf *out, struct moving_context *ctxt) +{ + struct moving_io *io; + + prt_printf(out, "%ps:", ctxt->fn); + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_printf(out, "reads: %u sectors %u", + atomic_read(&ctxt->read_ios), + atomic_read(&ctxt->read_sectors)); + prt_newline(out); + + prt_printf(out, "writes: %u sectors %u", + atomic_read(&ctxt->write_ios), + atomic_read(&ctxt->write_sectors)); + prt_newline(out); + + printbuf_indent_add(out, 2); + + mutex_lock(&ctxt->lock); + list_for_each_entry(io, &ctxt->ios, io_list) { + bch2_write_op_to_text(out, &io->write.op); + } + mutex_unlock(&ctxt->lock); + + printbuf_indent_sub(out, 4); +} + +void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct moving_context *ctxt; + + mutex_lock(&c->moving_context_lock); + list_for_each_entry(ctxt, &c->moving_context_list, list) + bch2_moving_ctxt_to_text(out, ctxt); + mutex_unlock(&c->moving_context_lock); +} + +void bch2_fs_move_init(struct bch_fs *c) +{ + INIT_LIST_HEAD(&c->moving_context_list); + mutex_init(&c->moving_context_lock); + + INIT_LIST_HEAD(&c->data_progress_list); + mutex_init(&c->data_progress_lock); +} diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 4c0013872347..50a6f7d7a292 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -11,6 +11,9 @@ struct bch_read_bio; struct moving_context { struct bch_fs *c; + struct list_head list; + void *fn; + struct bch_ratelimit *rate; struct bch_move_stats *stats; struct write_point_specifier wp; @@ -19,7 +22,10 @@ struct moving_context { /* For waiting on outstanding reads and writes: */ struct closure cl; + + struct mutex lock; struct list_head reads; + struct list_head ios; /* in flight sectors: */ atomic_t read_sectors; @@ -84,6 +90,9 @@ int bch2_data_job(struct bch_fs *, struct bch_ioctl_data); void bch2_move_stats_init(struct bch_move_stats *stats, char *name); +void bch2_data_jobs_to_text(struct printbuf *, struct bch_fs *); +void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *); +void bch2_fs_move_init(struct bch_fs *); #endif /* _BCACHEFS_MOVE_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7f7beed1e062..bf3aabdb0fc9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -681,6 +681,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_rebalance_init(c); bch2_fs_quota_init(c); bch2_fs_ec_init_early(c); + bch2_fs_move_init(c); INIT_LIST_HEAD(&c->list); @@ -705,8 +706,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->ec_stripe_new_list); mutex_init(&c->ec_stripe_new_lock); - INIT_LIST_HEAD(&c->data_progress_list); - mutex_init(&c->data_progress_lock); mutex_init(&c->ec_stripes_heap_lock); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e3a166f79cb6..6be6be881dbd 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -248,6 +248,7 @@ read_attribute(io_timers_read); read_attribute(io_timers_write); read_attribute(data_jobs); +read_attribute(moving_ctxts); #ifdef CONFIG_BCACHEFS_TESTS write_attribute(perf_test); @@ -277,25 +278,6 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) return ret; } -static long data_progress_to_text(struct printbuf *out, struct bch_fs *c) -{ - long ret = 0; - struct bch_move_stats *stats; - - mutex_lock(&c->data_progress_lock); - list_for_each_entry(stats, &c->data_progress_list, list) { - prt_printf(out, "%s: data type %s btree_id %s position: ", - stats->name, - bch2_data_types[stats->data_type], - bch2_btree_ids[stats->btree_id]); - bch2_bpos_to_text(out, stats->pos); - prt_printf(out, "%s", "\n"); - } - - mutex_unlock(&c->data_progress_lock); - return ret; -} - static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans trans; @@ -476,7 +458,10 @@ SHOW(bch2_fs) bch2_io_timers_to_text(out, &c->io_clock[WRITE]); if (attr == &sysfs_data_jobs) - data_progress_to_text(out, c); + bch2_data_jobs_to_text(out, c); + + if (attr == &sysfs_moving_ctxts) + bch2_fs_moving_ctxts_to_text(out, c); #ifdef BCH_WRITE_REF_DEBUG if (attr == &sysfs_write_refs) @@ -693,6 +678,7 @@ struct attribute *bch2_fs_internal_files[] = { sysfs_pd_controller_files(rebalance), &sysfs_data_jobs, + &sysfs_moving_ctxts, &sysfs_internal_uuid, NULL