bcachefs: use dedicated workqueue for tasks holding write refs
A workqueue resource deadlock has been observed when running fsck on a filesystem with a full/stuck journal. fsck is not currently able to repair the fs due to fairly rapid emergency shutdown, but rather than exit gracefully the fsck process hangs during the shutdown sequence. Fortunately this is easily recoverable from userspace, but the root cause involves code shared between the kernel and userspace and so should be addressed. The deadlock scenario involves the main task in the bch2_fs_stop() -> bch2_fs_read_only() path waiting on write references to drain with the fs state lock held. A bch2_read_only_work() workqueue task is scheduled on the system_long_wq, blocked on the state lock. Finally, various other write ref holding workqueue tasks are scheduled to run on the same workqueue and must complete in order to release references that the initial task is waiting on. To avoid this problem, we can split the dependent workqueue tasks across different workqueues. It's a bit of a waste to create a dedicated wq for the read-only worker, but there are several tasks throughout the fs that follow the pattern of acquiring a write reference and then scheduling to the system wq. Use a local wq for such tasks to break the subtle dependency between these and the read-only worker. Signed-off-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
76c70c57f0
commit
8bff9875a6
@ -1760,7 +1760,7 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
void bch2_do_discards(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
|
||||
!queue_work(system_long_wq, &c->discard_work))
|
||||
!queue_work(c->write_ref_wq, &c->discard_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
|
||||
}
|
||||
|
||||
@ -1886,7 +1886,7 @@ err:
|
||||
void bch2_do_invalidates(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
|
||||
!queue_work(system_long_wq, &c->invalidate_work))
|
||||
!queue_work(c->write_ref_wq, &c->invalidate_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||
}
|
||||
|
||||
|
@ -808,6 +808,12 @@ struct bch_fs {
|
||||
struct workqueue_struct *btree_io_complete_wq;
|
||||
/* copygc needs its own workqueue for index updates.. */
|
||||
struct workqueue_struct *copygc_wq;
|
||||
/*
|
||||
* Use a dedicated wq for write ref holder tasks. Required to avoid
|
||||
* dependency problems with other wq tasks that can block on ref
|
||||
* draining, such as read-only transition.
|
||||
*/
|
||||
struct workqueue_struct *write_ref_wq;
|
||||
|
||||
/* ALLOCATION */
|
||||
struct bch_devs_mask rw_devs[BCH_DATA_NR];
|
||||
|
@ -826,7 +826,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
|
||||
void bch2_do_stripe_deletes(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
|
||||
!schedule_work(&c->ec_stripe_delete_work))
|
||||
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
|
||||
}
|
||||
|
||||
|
@ -714,7 +714,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
|
||||
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
|
||||
!queue_work(system_long_wq, &c->snapshot_delete_work))
|
||||
!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
|
||||
}
|
||||
|
||||
@ -926,7 +926,7 @@ int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
|
||||
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
|
||||
return -EROFS;
|
||||
|
||||
if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
|
||||
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
|
||||
return 0;
|
||||
}
|
||||
|
@ -493,6 +493,8 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
|
||||
if (c->write_ref_wq)
|
||||
destroy_workqueue(c->write_ref_wq);
|
||||
if (c->io_complete_wq)
|
||||
destroy_workqueue(c->io_complete_wq);
|
||||
if (c->copygc_wq)
|
||||
@ -787,6 +789,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
|
||||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
|
||||
WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
|
||||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
|
||||
WQ_FREEZABLE, 0)) ||
|
||||
#ifndef BCH_WRITE_REF_DEBUG
|
||||
percpu_ref_init(&c->writes, bch2_writes_disabled,
|
||||
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
|
||||
|
Loading…
Reference in New Issue
Block a user