nvme-multipath: avoid hang on inaccessible namespaces
During repetitive namespace remapping operations on the target the namespace might have changed between the time the initial scan was performed, and partition scan was invoked by device_add_disk() in nvme_mpath_set_live(). We then end up with a stuck scanning process: [<0>] folio_wait_bit_common+0x12a/0x310 [<0>] filemap_read_folio+0x97/0xd0 [<0>] do_read_cache_folio+0x108/0x390 [<0>] read_part_sector+0x31/0xa0 [<0>] read_lba+0xc5/0x160 [<0>] efi_partition+0xd9/0x8f0 [<0>] bdev_disk_changed+0x23d/0x6d0 [<0>] blkdev_get_whole+0x78/0xc0 [<0>] bdev_open+0x2c6/0x3b0 [<0>] bdev_file_open_by_dev+0xcb/0x120 [<0>] disk_scan_partitions+0x5d/0x100 [<0>] device_add_disk+0x402/0x420 [<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core] [<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core] [<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core] [<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core] [<0>] nvme_scan_work+0x1a3/0x490 [nvme_core] This happens when we have several paths, some of which are inaccessible, and the active paths are removed first. Then nvme_find_path() will requeue I/O in the ns_head (as paths are present), but the requeue list is never triggered as all remaining paths are inactive. This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(), and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once the last path has been removed to properly terminate pending I/O. Signed-off-by: Hannes Reinecke <hare@kernel.org> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
parent
63bcf9014e
commit
3b97f5a05c
@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
|
||||
continue;
|
||||
@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
|
||||
{
|
||||
if (!head->disk)
|
||||
return;
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
nvme_cdev_del(&head->cdev, &head->cdev_device);
|
||||
del_gendisk(head->disk);
|
||||
}
|
||||
/*
|
||||
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
|
||||
* to allow multipath to fail all I/O.
|
||||
*/
|
||||
synchronize_srcu(&head->srcu);
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
}
|
||||
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
|
Loading…
Reference in New Issue
Block a user