nfs/blocklayout: Limit repeat device registration on failure
[ Upstream commit 614733f9441ed53bb442d4734112ec1e24bd6da7 ]
Every pNFS SCSI IO wants to do LAYOUTGET, then within the layout find the
device which can drive GETDEVINFO, then finally may need to prep the device
with a reservation. This slow work makes a mess of IO latencies if one of
the later steps is going to fail for awhile.
If we're unable to register a SCSI device, ensure we mark the device as
unavailable so that it will timeout and be re-added via GETDEVINFO. This
avoids repeated doomed attempts to register a device in the IO path.
Add some clarifying comments as well.
Fixes: d869da91cc ("nfs/blocklayout: Fix premature PR key unregistration")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
faa4bacfae
commit
bff8460376
@@ -571,19 +571,32 @@ retry:
|
||||
if (!node)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/*
|
||||
* Devices that are marked unavailable are left in the cache with a
|
||||
* timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
|
||||
* constantly attempting to register the device. Once marked as
|
||||
* unavailable they must be deleted and never reused.
|
||||
*/
|
||||
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
|
||||
unsigned long end = jiffies;
|
||||
unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
|
||||
|
||||
if (!time_in_range(node->timestamp_unavailable, start, end)) {
|
||||
/* Uncork subsequent GETDEVINFO operations for this device */
|
||||
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
|
||||
goto retry;
|
||||
}
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
|
||||
if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
|
||||
/*
|
||||
* If we cannot register, treat this device as transient:
|
||||
* Make a negative cache entry for the device
|
||||
*/
|
||||
nfs4_mark_deviceid_unavailable(node);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
return node;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user