NVMe-oF: Disconnect subsystems

Current code doesn't disconnect NVMe-oF subsystems when doing a
disconnect_volume or on connect_volume failure.

This is very problematic for systems that don't share subsytems for
multiple namespaces, because both the device (i.e., /dev/nvme0n1) and
the subsystem (i.e., /dev/nvme0) will stay forever (now that we connect
with the controller loss timeout set to infinite, before it was for 10
minutes) in the system (until manually removed) while the host keeps
trying to connect to the remote subsystem, but it won't be able to
connect because in this case drivers usually destroy both the namespace
and the subsystem simultaneously (so there's no AER message to indicate
the change in available namespaces within the subsystem).

We'll experience multiple issues with all these leftover devices, such
as an ever increasing number of kernel log messages with the connection
retries, possible exhaustion of number of connected NVMe subsystems
and/or files in /dev, and so on.

This patch makes sure the nvmeof connector disconnects a subsystem when
there is no longer a namespace present or when the only namespace
present is the one we are disconnecting. This is done both on the
disconnect_volume call as well as failures during connect_volume.

This is not a full solution to the problem of leaving leftover devices,
because for drivers that share the subsystem there are race conditions
between unexport/unmap of volumes on the cinder side and os-brick
disconnect_volume calls.  To fully prevent this situation Cinder needs
to start reporting the shared_targets value for NVMe volumes (something
it's already doing for iSCSI).

Partial-Bug: #1961102
Change-Id: Ia00be53420307d6ac1f100420d039da7b65dc349
This commit is contained in:
Gorka Eguileor 2022-04-29 12:44:32 +02:00
parent 4c21b40df2
commit 05a4c05c14
3 changed files with 315 additions and 36 deletions

View File

@ -15,7 +15,7 @@
"""Exceptions for the Brick library."""
import traceback
from typing import Iterable, List, Optional # noqa: H301
from typing import Any, List, Optional # noqa: H301
from oslo_concurrency import processutils as putils
from oslo_log import log as logging
@ -215,7 +215,7 @@ class ExceptionChainer(BrickException):
def context(self,
catch_exception: bool,
msg: str = '',
*msg_args: Iterable):
*msg_args: Any):
self._catch_exception = catch_exception
self._exc_msg = msg
self._exc_msg_args = list(msg_args)

View File

@ -206,6 +206,23 @@ class Portal(object):
'"devices_on_start"')
return target.get_device_path_by_initial_devices()
def get_all_namespaces_ctrl_paths(self) -> List[str]:
"""Return all nvme sysfs control paths for this portal.
The basename of the path can be single volume or a channel to an ANA
volume.
For example for the nvme1 controller we could return:
['/sys/class/nvme-fabrics/ctl/nvme1n1 ',
'/sys/class/nvme-fabrics/ctl/nvme0c1n1']
"""
if not self.controller:
return []
# Look under the controller, where we will have normal devices and ANA
# channel devices. For nvme1 we could find nvme1n1 or nvme0c1n1)
return glob.glob(f'{NVME_CTRL_SYSFS_PATH}{self.controller}/nvme*')
def get_device_by_property(self,
prop_name: str,
value: str) -> Optional[str]:
@ -219,10 +236,7 @@ class Portal(object):
LOG.debug('Looking for device where %s=%s on controller %s',
prop_name, value, self.controller)
# Look under the controller, where we will have normal devices and ANA
# channel devices. For nvme1 we could find nvme1n1 or nvme0c1n1)
paths = glob.glob(f'{NVME_CTRL_SYSFS_PATH}{self.controller}/nvme*')
for path in paths:
for path in self.get_all_namespaces_ctrl_paths():
prop_value = sysfs_property(prop_name, path)
if prop_value == value:
# Convert path to the namespace device name
@ -236,6 +250,34 @@ class Portal(object):
LOG.debug('No device Found on controller %s', self.controller)
return None
def can_disconnect(self) -> bool:
"""Check if this portal can be disconnected.
A portal can be disconnected if it is connected (has a controller name)
and the subsystem has no namespaces left or if it has only one and it
is from this target.
"""
if not self.controller:
LOG.debug('Portal %s is not present', self)
return False
ns_ctrl_paths = self.get_all_namespaces_ctrl_paths()
num_namespaces = len(ns_ctrl_paths)
# No namespaces => disconnect, >1 ns => can't disconnect
if num_namespaces != 1:
result = not bool(num_namespaces)
LOG.debug('There are %s namespaces on %s so we %s disconnect',
num_namespaces, self, 'can' if result else 'cannot')
return result
# With only 1 namespace, check if it belongs to the portal
# Get the device on this target's portal (may be None)
portal_dev = os.path.basename(self.get_device() or '')
result = portal_dev == nvme_basename(ns_ctrl_paths[0])
LOG.debug("The only namespace on portal %s is %s and %s this target's",
self, portal_dev, "matches" if result else "doesn't match")
return result
class Target(object):
"""Representation of an NVMe-oF Target and some related operations."""
@ -803,13 +845,17 @@ class NVMeOFConnector(base.BaseLinuxConnector):
def connect_volume(
self, connection_properties: NVMeOFConnProps) -> Dict[str, str]:
"""Attach and discover the volume."""
if connection_properties.is_replicated is False:
LOG.debug('Starting non replicated connection')
path = self._connect_target(connection_properties.targets[0])
try:
if connection_properties.is_replicated is False:
LOG.debug('Starting non replicated connection')
path = self._connect_target(connection_properties.targets[0])
else: # If we know it's replicated or we don't yet know
LOG.debug('Starting replicated connection')
path = self._connect_volume_replicated(connection_properties)
else: # If we know it's replicated or we don't yet know
LOG.debug('Starting replicated connection')
path = self._connect_volume_replicated(connection_properties)
except Exception:
self._try_disconnect_all(connection_properties)
raise
return {'type': 'block', 'path': path}
@ -914,13 +960,14 @@ class NVMeOFConnector(base.BaseLinuxConnector):
host_device_paths.append(rep_host_device_path)
except Exception as ex:
LOG.error("_connect_target: %s", ex)
if not host_device_paths:
raise exception.VolumeDeviceNotFound(
device=connection_properties.targets)
if connection_properties.is_replicated:
device_path = self._handle_replicated_volume(host_device_paths,
connection_properties)
device_path = self._handle_replicated_volume(
host_device_paths, connection_properties)
else:
device_path = self._handle_single_replica(
host_device_paths, connection_properties.alias)
@ -1006,22 +1053,64 @@ class NVMeOFConnector(base.BaseLinuxConnector):
conn_props.cinder_volume_id or conn_props.targets[0].nqn)
return
exc = exception.ExceptionChainer()
if not os.path.exists(device_path):
LOG.warning("Trying to disconnect device %(device_path)s, but "
"it is not connected. Skipping.",
{'device_path': device_path})
return
try:
# We assume that raid devices are flushed when ending the raid
if device_path.startswith(RAID_PATH):
# We assume that raid devices are flushed when ending the raid
if device_path.startswith(RAID_PATH):
with exc.context(force, 'Failed to end raid %s', device_path):
self.end_raid(device_path)
else:
else:
with exc.context(force, 'Failed to flush %s', device_path):
self._linuxscsi.flush_device_io(device_path)
except Exception:
self._try_disconnect_all(conn_props, exc)
if exc:
LOG.warning('There were errors removing %s', device_path)
if not ignore_errors:
raise
raise exc
def _try_disconnect_all(
self,
conn_props: NVMeOFConnProps,
exc: Optional[exception.ExceptionChainer] = None) -> None:
"""Disconnect all subsystems that are not being used.
Only sees if it has to disconnect this connection properties portals,
leaves other alone.
Since this is unrelated to the flushing of the devices failures will be
logged but they won't be raised.
"""
if exc is None:
exc = exception.ExceptionChainer()
for target in conn_props.targets:
# Associate each portal with its controller name
target.set_portals_controllers()
for portal in target.portals:
# Ignore exceptions to disconnect as many as possible.
with exc.context(True, 'Failed to disconnect %s', portal):
self._try_disconnect(portal)
def _try_disconnect(self, portal: Portal) -> None:
"""Disconnect a specific subsystem if it's safe.
Only disconnect if it has no namespaces left or has only one left and
it is from this connection.
"""
LOG.debug('Checking if %s can be disconnected', portal)
if portal.can_disconnect():
self._execute('nvme', 'disconnect',
'-d', '/dev/' + portal.controller, # type: ignore
root_helper=self._root_helper, run_as_root=True)
# ####### Extend methods ########

View File

@ -209,12 +209,33 @@ class PortalTestCase(test_base.TestCase):
self.assertEqual('result', res)
mock_get_dev.assert_called_once_with()
@mock.patch('glob.glob')
def test_get_all_namespaces_ctrl_paths(self, mock_glob):
expected = ['/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme1c1n2']
mock_glob.return_value = expected[:]
self.portal.controller = 'nvme0'
res = self.portal.get_all_namespaces_ctrl_paths()
self.assertEqual(expected, res)
mock_glob.assert_called_once_with(
'/sys/class/nvme-fabrics/ctl/nvme0/nvme*')
@mock.patch('glob.glob')
def test_get_all_namespaces_ctrl_paths_no_controller(self, mock_glob):
res = self.portal.get_all_namespaces_ctrl_paths()
self.assertEqual([], res)
mock_glob.assert_not_called()
@mock.patch.object(nvmeof, 'nvme_basename', return_value='nvme1n2')
@mock.patch.object(nvmeof, 'sysfs_property')
@mock.patch('glob.glob')
def test_get_device_by_property(self, mock_glob, mock_property, mock_name):
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test_get_device_by_property(self, mock_paths, mock_property,
mock_name):
"""Searches all devices for the right one and breaks when found."""
mock_glob.return_value = [
mock_paths.return_value = [
'/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme1c1n2',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme0n3'
@ -226,8 +247,7 @@ class PortalTestCase(test_base.TestCase):
self.assertEqual('/dev/nvme1n2', res)
mock_glob.assert_called_once_with(
'/sys/class/nvme-fabrics/ctl/nvme0/nvme*')
mock_paths.assert_called_once_with()
self.assertEqual(2, mock_property.call_count)
mock_property.assert_has_calls(
[mock.call('uuid', '/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1'),
@ -238,12 +258,12 @@ class PortalTestCase(test_base.TestCase):
@mock.patch.object(nvmeof, 'nvme_basename', return_value='nvme1n2')
@mock.patch.object(nvmeof, 'sysfs_property')
@mock.patch('glob.glob')
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test_get_device_by_property_not_found(
self, mock_glob, mock_property, mock_name):
self, mock_paths, mock_property, mock_name):
"""Exhausts devices searching before returning None."""
mock_glob.return_value = ['/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme0n2']
mock_paths.return_value = ['/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme0n2']
mock_property.side_effect = ['uuid1', 'uuid2']
self.portal.controller = 'nvme0'
@ -251,8 +271,7 @@ class PortalTestCase(test_base.TestCase):
self.assertIsNone(res)
mock_glob.assert_called_once_with(
'/sys/class/nvme-fabrics/ctl/nvme0/nvme*')
mock_paths.assert_called_once_with()
self.assertEqual(2, mock_property.call_count)
mock_property.assert_has_calls(
[mock.call('uuid', '/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1'),
@ -260,6 +279,51 @@ class PortalTestCase(test_base.TestCase):
)
mock_name.assert_not_called()
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test__can_disconnect_no_controller_name(self, mock_paths):
"""Cannot disconnect when portal doesn't have a controller."""
res = self.portal.can_disconnect()
self.assertFalse(res)
mock_paths.assert_not_called()
@ddt.data(([], True),
(['/sys/class/nvme-fabrics/ctl/nvme0/nvme0n1',
'/sys/class/nvme-fabrics/ctl/nvme0/nvme0n2'], False))
@ddt.unpack
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test__can_disconnect_not_1_namespace(
self, ctrl_paths, expected, mock_paths):
"""Check if can disconnect when we don't have 1 namespace in subsys."""
self.portal.controller = 'nvme0'
mock_paths.return_value = ctrl_paths
res = self.portal.can_disconnect()
self.assertIs(expected, res)
mock_paths.assert_called_once_with()
@mock.patch.object(nvmeof.Portal, 'get_device')
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test__can_disconnect(self, mock_paths, mock_device):
"""Can disconnect if the namespace is the one from this target.
This tests that even when ANA is enabled it can identify the control
path as belonging to the used device path.
"""
self.portal.controller = 'nvme0'
mock_device.return_value = '/dev/nvme1n2'
mock_paths.return_value = [
'/sys/class/nvme-fabrics/ctl/nvme0/nvme1c1n2']
self.assertTrue(self.portal.can_disconnect())
@mock.patch.object(nvmeof.Portal, 'get_device')
@mock.patch.object(nvmeof.Portal, 'get_all_namespaces_ctrl_paths')
def test__can_disconnect_different_target(self, mock_paths, mock_device):
"""Cannot disconnect if the namespace is from a different target."""
self.portal.controller = 'nvme0'
mock_device.return_value = None
mock_paths.return_value = [
'/sys/class/nvme-fabrics/ctl/nvme0/nvme1c1n2']
self.assertFalse(self.portal.can_disconnect())
@ddt.ddt
class TargetTestCase(test_base.TestCase):
@ -970,23 +1034,45 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
self.connector.get_volume_paths(conn_props))
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect_all')
@mock.patch.object(nvmeof.NVMeOFConnector, '_connect_target')
def test_connect_volume_not_replicated(self, mock_connect_target):
def test_connect_volume_not_replicated(
self, mock_connect_target, mock_disconnect):
"""Single vol attach."""
connection_properties = volume_replicas[0].copy()
mock_connect_target.return_value = '/dev/nvme0n1'
self.assertEqual({'type': 'block', 'path': '/dev/nvme0n1'},
self.connector.connect_volume(connection_properties))
mock_connect_target.assert_called_with(mock.ANY)
self.assertIsInstance(mock_connect_target.call_args[0][0],
nvmeof.Target)
mock_disconnect.assert_not_called()
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect_all')
@mock.patch.object(nvmeof.NVMeOFConnector, '_connect_target')
def test_connect_volume_not_replicated_fails(
self, mock_connect_target, mock_disconnect):
"""Single vol attach fails and disconnects on failure."""
connection_properties = volume_replicas[0].copy()
mock_connect_target.side_effect = exception.VolumeDeviceNotFound,
self.assertRaises(exception.VolumeDeviceNotFound,
self.connector.connect_volume,
connection_properties)
mock_connect_target.assert_called_with(mock.ANY)
self.assertIsInstance(mock_connect_target.call_args[0][0],
nvmeof.Target)
mock_disconnect.assert_called_with(mock.ANY)
self.assertIsInstance(mock_disconnect.call_args[0][0],
nvmeof.NVMeOFConnProps)
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect_all')
@mock.patch.object(nvmeof.NVMeOFConnector, '_connect_volume_replicated')
@mock.patch.object(nvmeof.NVMeOFConnector, '_connect_target')
def test_connect_volume_replicated(
self, mock_connect_target, mock_replicated_volume):
self, mock_connect_target, mock_replicated_volume,
mock_disconnect):
mock_replicated_volume.return_value = '/dev/md/md1'
actual = self.connector.connect_volume(connection_properties)
@ -998,20 +1084,27 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
self.assertIsInstance(mock_replicated_volume.call_args[0][0],
nvmeof.NVMeOFConnProps)
mock_connect_target.assert_not_called()
mock_disconnect.assert_not_called()
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect_all')
@mock.patch.object(nvmeof.NVMeOFConnector, '_handle_replicated_volume')
@mock.patch.object(nvmeof.NVMeOFConnector, '_connect_target')
def test_connect_volume_replicated_exception(
self, mock_connect_target, mock_replicated_volume):
self, mock_connect_target, mock_replicated_volume,
mock_disconnect):
mock_connect_target.side_effect = Exception()
self.assertRaises(exception.VolumeDeviceNotFound,
self.connector.connect_volume, connection_properties)
mock_disconnect.assert_called_with(mock.ANY)
self.assertIsInstance(mock_disconnect.call_args[0][0],
nvmeof.NVMeOFConnProps)
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect_all')
@mock.patch.object(nvmeof.NVMeOFConnector, 'get_volume_paths')
@mock.patch('os.path.exists', return_value=True)
def test_disconnect_volume_path_not_found(
self, mock_exists, mock_get_paths):
self, mock_exists, mock_get_paths, mock_disconnect):
"""Disconnect can't find device path from conn props and dev info."""
mock_get_paths.return_value = []
res = self.connector.disconnect_volume(connection_properties,
@ -1022,6 +1115,7 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
self.assertIsInstance(mock_get_paths.call_args[0][0],
nvmeof.NVMeOFConnProps)
mock_exists.assert_not_called()
mock_disconnect.assert_not_called()
@mock.patch.object(nvmeof.NVMeOFConnector, 'get_volume_paths')
@mock.patch('os.path.exists', return_value=True)
@ -1040,6 +1134,7 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
nvmeof.NVMeOFConnProps)
mock_exists.assert_called_once_with(dev_path)
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.flush_device_io')
@mock.patch.object(nvmeof.NVMeOFConnector, 'get_volume_paths')
@mock.patch.object(nvmeof.NVMeOFConnector, 'end_raid')
@ -1060,6 +1155,7 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
mock_end_raid.assert_not_called()
mock_flush.assert_called_with(dev_path)
@mock.patch.object(nvmeof.Target, 'set_portals_controllers', mock.Mock())
@mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.flush_device_io')
@mock.patch.object(nvmeof.NVMeOFConnector, 'get_volume_paths')
@mock.patch.object(nvmeof.NVMeOFConnector, 'end_raid')
@ -1849,3 +1945,97 @@ class NVMeOFConnectorTestCase(test_connector.ConnectorTestCase):
self.connector.use_multipath = use_multipath
self.connector.native_multipath_supported = ana_support
self.assertIs(result, self.connector._do_multipath())
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect')
@mock.patch.object(nvmeof.Target, 'set_portals_controllers')
def test__try_disconnect_all(self, mock_set_portals, mock_disconnect):
"""Disconnect all portals for all targets in connection properties."""
connection_properties = {
'vol_uuid': VOL_UUID,
'alias': 'raid_alias',
'replica_count': 2,
'volume_replicas': [
{'target_nqn': 'nqn1',
'vol_uuid': VOL_UUID1,
'portals': [['portal1', 'port_value', 'RoCEv2'],
['portal2', 'port_value', 'anything']]},
{'target_nqn': 'nqn2',
'vol_uuid': VOL_UUID2,
'portals': [['portal4', 'port_value', 'anything'],
['portal3', 'port_value', 'RoCEv2']]}
],
}
conn_props = nvmeof.NVMeOFConnProps(connection_properties)
exc = exception.ExceptionChainer()
self.connector._try_disconnect_all(conn_props, exc)
self.assertEqual(2, mock_set_portals.call_count)
mock_set_portals.assert_has_calls((mock.call(), mock.call()))
self.assertEqual(4, mock_disconnect.call_count)
mock_disconnect.assert_has_calls((
mock.call(conn_props.targets[0].portals[0]),
mock.call(conn_props.targets[0].portals[1]),
mock.call(conn_props.targets[1].portals[0]),
mock.call(conn_props.targets[1].portals[1])
))
self.assertFalse(bool(exc))
@mock.patch.object(nvmeof.NVMeOFConnector, '_try_disconnect')
@mock.patch.object(nvmeof.Target, 'set_portals_controllers')
def test__try_disconnect_all_with_failures(
self, mock_set_portals, mock_disconnect):
"""Even with failures it should try to disconnect all portals."""
exc = exception.ExceptionChainer()
mock_disconnect.side_effect = [Exception, None]
self.connector._try_disconnect_all(self.conn_props, exc)
mock_set_portals.assert_called_once_with()
self.assertEqual(3, mock_disconnect.call_count)
mock_disconnect.assert_has_calls((
mock.call(self.conn_props.targets[0].portals[0]),
mock.call(self.conn_props.targets[0].portals[1]),
mock.call(self.conn_props.targets[0].portals[2])
))
self.assertTrue(bool(exc))
@mock.patch.object(nvmeof.NVMeOFConnector, '_execute')
@mock.patch.object(nvmeof.Portal, 'can_disconnect')
def test__try_disconnect(self, mock_can_disconnect, mock_execute):
"""We try to disconnect when we can without breaking other devices."""
mock_can_disconnect.return_value = True
portal = self.conn_props.targets[0].portals[0]
portal.controller = 'nvme0'
self.connector._try_disconnect(portal)
mock_can_disconnect.assert_called_once_with()
mock_execute.assert_called_once_with(
'nvme', 'disconnect', '-d', '/dev/nvme0',
root_helper=self.connector._root_helper, run_as_root=True)
@mock.patch.object(nvmeof.NVMeOFConnector, '_execute')
@mock.patch.object(nvmeof.Portal, 'can_disconnect')
def test__try_disconnect_failure(self, mock_can_disconnect, mock_execute):
"""Confirm disconnect doesn't swallow exceptions."""
mock_can_disconnect.return_value = True
portal = self.conn_props.targets[0].portals[0]
portal.controller = 'nvme0'
mock_execute.side_effect = ValueError
self.assertRaises(ValueError,
self.connector._try_disconnect, portal)
mock_can_disconnect.assert_called_once_with()
mock_execute.assert_called_once_with(
'nvme', 'disconnect', '-d', '/dev/nvme0',
root_helper=self.connector._root_helper, run_as_root=True)
@mock.patch.object(nvmeof.NVMeOFConnector, '_execute')
@mock.patch.object(nvmeof.Portal, 'can_disconnect')
def test__try_disconnect_no_disconnect(
self, mock_can_disconnect, mock_execute):
"""Doesn't disconnect when it would break other devices."""
mock_can_disconnect.return_value = False
portal = self.conn_props.targets[0].portals[0]
self.connector._try_disconnect(portal)
mock_can_disconnect.assert_called_once_with()
mock_execute.assert_not_called()