nvme-multipath: avoid hang on inaccessible namespaces
BugLink: https://bugs.launchpad.net/bugs/2101915 [ Upstream commit 3b97f5a05cfc55e7729ff3769f63eef64e2178bb ] During repetitive namespace remapping operations on the target the namespace might have changed between the time the initial scan was performed, and partition scan was invoked by device_add_disk() in nvme_mpath_set_live(). We then end up with a stuck scanning process: [<0>] folio_wait_bit_common+0x12a/0x310 [<0>] filemap_read_folio+0x97/0xd0 [<0>] do_read_cache_folio+0x108/0x390 [<0>] read_part_sector+0x31/0xa0 [<0>] read_lba+0xc5/0x160 [<0>] efi_partition+0xd9/0x8f0 [<0>] bdev_disk_changed+0x23d/0x6d0 [<0>] blkdev_get_whole+0x78/0xc0 [<0>] bdev_open+0x2c6/0x3b0 [<0>] bdev_file_open_by_dev+0xcb/0x120 [<0>] disk_scan_partitions+0x5d/0x100 [<0>] device_add_disk+0x402/0x420 [<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core] [<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core] [<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core] [<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core] [<0>] nvme_scan_work+0x1a3/0x490 [nvme_core] This happens when we have several paths, some of which are inaccessible, and the active paths are removed first. Then nvme_find_path() will requeue I/O in the ns_head (as paths are present), but the requeue list is never triggered as all remaining paths are inactive. This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(), and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once the last path has been removed to properly terminate pending I/O. Signed-off-by: Hannes Reinecke <hare@kernel.org> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Keith Busch <kbusch@kernel.org> Stable-dep-of: 5dd18f09ce73 ("nvme/multipath: Fix RCU list traversal to use SRCU primitive") Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Koichiro Den <koichiro.den@canonical.com> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
This commit is contained in:
committed by
Stefan Bader
parent
cf34ad7267
commit
953f250fd6
@@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
|
||||
continue;
|
||||
@@ -997,8 +1000,7 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
|
||||
{
|
||||
if (!head->disk)
|
||||
return;
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
nvme_cdev_del(&head->cdev, &head->cdev_device);
|
||||
/*
|
||||
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
|
||||
@@ -1008,6 +1010,12 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
del_gendisk(head->disk);
|
||||
}
|
||||
/*
|
||||
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
|
||||
* to allow multipath to fail all I/O.
|
||||
*/
|
||||
synchronize_srcu(&head->srcu);
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
}
|
||||
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
|
||||
Reference in New Issue
Block a user