1

bcachefs: bch2_print_allocator_stuck()

If we block on the allocator for more than 10 seconds, print out some
useful debugging info.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-05-03 14:49:23 -04:00
parent 4c5b7294de
commit e98786ea85
5 changed files with 114 additions and 76 deletions

View File

@ -1630,3 +1630,104 @@ void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
prt_str(out, "Btree write point\n"); prt_str(out, "Btree write point\n");
bch2_write_point_to_text(out, c, &c->btree_write_point); bch2_write_point_to_text(out, c, &c->btree_write_point);
} }
void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
prt_printf(out, "hidden\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
prt_printf(out, "btree\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
prt_printf(out, "data\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.data));
prt_printf(out, "cached\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
prt_printf(out, "reserved\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
prt_printf(out, "nr_inodes\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
percpu_up_read(&c->mark_lock);
prt_newline(out);
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
prt_printf(out, "open buckets total\t%u\n", OPEN_BUCKETS_COUNT);
prt_printf(out, "open_buckets_wait\t%s\n", c->open_buckets_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open_buckets_btree\t%u\n", nr[BCH_DATA_btree]);
prt_printf(out, "open_buckets_user\t%u\n", nr[BCH_DATA_user]);
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache_nr);
}
void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct bch_dev_usage stats = bch2_dev_usage_read(ca);
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
bch2_dev_usage_to_text(out, &stats);
prt_newline(out);
prt_printf(out, "reserves:\n");
for (unsigned i = 0; i < BCH_WATERMARK_NR; i++)
prt_printf(out, "%s\t%llu\r\n", bch2_watermarks[i], bch2_dev_buckets_reserved(ca, i));
prt_newline(out);
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
prt_printf(out, "open buckets\t%i\r\n", ca->nr_open_buckets);
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
}
void bch2_print_allocator_stuck(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2);
bch2_fs_alloc_debug_to_text(&buf, c);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
for_each_online_member(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
printbuf_indent_add(&buf, 2);
bch2_dev_alloc_debug_to_text(&buf, ca);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
}
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
bch2_copygc_wait_to_text(&buf, c);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
prt_printf(&buf, "Journal debug:\n");
printbuf_indent_add(&buf, 2);
bch2_journal_debug_to_text(&buf, &c->journal);
printbuf_indent_sub(&buf, 2);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}

View File

@ -222,4 +222,9 @@ void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void bch2_print_allocator_stuck(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */

View File

@ -1494,7 +1494,11 @@ err:
if ((op->flags & BCH_WRITE_SYNC) || if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_DONE) && (!(op->flags & BCH_WRITE_DONE) &&
!(op->flags & BCH_WRITE_IN_WORKER))) { !(op->flags & BCH_WRITE_IN_WORKER))) {
closure_sync(&op->cl); if (closure_sync_timeout(&op->cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&op->cl);
}
__bch2_write_index(op); __bch2_write_index(op);
if (!(op->flags & BCH_WRITE_DONE)) if (!(op->flags & BCH_WRITE_DONE))

View File

@ -1416,7 +1416,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
if (!out->nr_tabstops) if (!out->nr_tabstops)
printbuf_tabstop_push(out, 24); printbuf_tabstop_push(out, 28);
out->atomic++; out->atomic++;
rcu_read_lock(); rcu_read_lock();

View File

@ -346,37 +346,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
prt_printf(out, "\n"); prt_printf(out, "\n");
} }
static void fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
{
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
prt_printf(out, "hidden\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
prt_printf(out, "btree\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
prt_printf(out, "data\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.data));
prt_printf(out, "cached\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
prt_printf(out, "reserved\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
prt_printf(out, "online_reserved\t%llu\n", percpu_u64_get(c->online_reserved));
prt_printf(out, "nr_inodes\t%llu\n", bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
percpu_up_read(&c->mark_lock);
prt_newline(out);
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
prt_printf(out, "open buckets total\t%u\n", OPEN_BUCKETS_COUNT);
prt_printf(out, "open_buckets_wait\t%s\n", c->open_buckets_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open_buckets_btree\t%u\n", nr[BCH_DATA_btree]);
prt_printf(out, "open_buckets_user\t%u\n", nr[BCH_DATA_user]);
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache_nr);
}
SHOW(bch2_fs) SHOW(bch2_fs)
{ {
struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
@ -458,7 +427,7 @@ SHOW(bch2_fs)
bch2_disk_groups_to_text(out, c); bch2_disk_groups_to_text(out, c);
if (attr == &sysfs_alloc_debug) if (attr == &sysfs_alloc_debug)
fs_alloc_debug_to_text(out, c); bch2_fs_alloc_debug_to_text(out, c);
return 0; return 0;
} }
@ -779,47 +748,6 @@ struct attribute *bch2_fs_time_stats_files[] = {
NULL NULL
}; };
static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct bch_dev_usage stats = bch2_dev_usage_read(ca);
unsigned nr[BCH_DATA_NR];
memset(nr, 0, sizeof(nr));
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstop_push(out, 8);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
bch2_dev_usage_to_text(out, &stats);
prt_newline(out);
prt_printf(out, "reserves:\n");
for (unsigned i = 0; i < BCH_WATERMARK_NR; i++)
prt_printf(out, "%s\t%llu\r\n", bch2_watermarks[i], bch2_dev_buckets_reserved(ca, i));
prt_newline(out);
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
prt_printf(out, "freelist_wait\t%s\n", c->freelist_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open buckets allocated\t%i\n", OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
prt_printf(out, "open buckets this dev\t%i\n", ca->nr_open_buckets);
prt_printf(out, "open buckets total\t%u\n", OPEN_BUCKETS_COUNT);
prt_printf(out, "open_buckets_wait\t%s\n", c->open_buckets_wait.list.first ? "waiting" : "empty");
prt_printf(out, "open_buckets_btree\t%u\n", nr[BCH_DATA_btree]);
prt_printf(out, "open_buckets_user\t%u\n", nr[BCH_DATA_user]);
prt_printf(out, "buckets_to_invalidate\t%llu\n", should_invalidate_buckets(ca, stats));
prt_printf(out, "btree reserve cache\t%u\n", c->btree_reserve_cache_nr);
}
static const char * const bch2_rw[] = { static const char * const bch2_rw[] = {
"read", "read",
"write", "write",
@ -889,7 +817,7 @@ SHOW(bch2_dev)
* 100 / CONGESTED_MAX); * 100 / CONGESTED_MAX);
if (attr == &sysfs_alloc_debug) if (attr == &sysfs_alloc_debug)
dev_alloc_debug_to_text(out, ca); bch2_dev_alloc_debug_to_text(out, ca);
return 0; return 0;
} }