Browse Source

Fix multipath disconnect with path failure

Under certain conditions detaching a multipath device may result on
failure when flushing one of the individual paths, but the disconnect
should have succeeded, because there were other paths available to flush
all the data.

OS-Brick is currently following standard recommended disconnect
mechanism for multipath devices:

- Release all device holders
- Flush multipath
- Flush single paths
- Delete single devices

The problem is that this procedure does an innecessary step, flushing
individual single paths, that may result in an error.

Originally it was thought that the individual flushes were necessary to
prevent data loss, but upon further study of the multipath-tools and the
device-mapper code it was discovered that this is not really the case.

After the multipath flushing has been completed we can be sure that the
data has been successfully sent and acknowledge by the device.

Closes-Bug: #1785669
Change-Id: I10f7fea2d69d5d9011f0d5486863a8d9d8a9696e
(cherry picked from commit d866ee75c2)
changes/79/592579/2
Gorka Eguileor 3 years ago
committed by Jay S. Bryant
parent
commit
b253e31696
  1. 13
      os_brick/initiator/connectors/fibre_channel.py
  2. 2
      os_brick/initiator/connectors/fibre_channel_s390x.py
  3. 18
      os_brick/initiator/connectors/iscsi.py
  4. 50
      os_brick/initiator/linuxscsi.py
  5. 22
      os_brick/tests/initiator/connectors/test_fibre_channel.py
  6. 3
      os_brick/tests/initiator/connectors/test_fibre_channel_s390x.py
  7. 26
      os_brick/tests/initiator/connectors/test_iscsi.py
  8. 86
      os_brick/tests/initiator/test_linuxscsi.py
  9. 10
      releasenotes/notes/fix-multipath-disconnect-819d01e6e981883e.yaml

13
os_brick/initiator/connectors/fibre_channel.py

@ -333,14 +333,21 @@ class FibreChannelConnector(base.BaseLinuxConnector):
devices.append(device_info)
LOG.debug("devices to remove = %s", devices)
self._remove_devices(connection_properties, devices)
self._remove_devices(connection_properties, devices, device_info)
def _remove_devices(self, connection_properties, devices):
def _remove_devices(self, connection_properties, devices, device_info):
# There may have been more than 1 device mounted
# by the kernel for this volume. We have to remove
# all of them
path_used = self._linuxscsi.get_dev_path(connection_properties,
device_info)
was_multipath = '/pci-' not in path_used
for device in devices:
self._linuxscsi.remove_scsi_device(device["device"])
device_path = device['device']
flush = self._linuxscsi.requires_flush(device_path,
path_used,
was_multipath)
self._linuxscsi.remove_scsi_device(device_path, flush=flush)
def _get_pci_num(self, hba):
# NOTE(walter-boring)

2
os_brick/initiator/connectors/fibre_channel_s390x.py

@ -87,7 +87,7 @@ class FibreChannelConnectorS390X(fibre_channel.FibreChannelConnector):
]
return host_device
def _remove_devices(self, connection_properties, devices):
def _remove_devices(self, connection_properties, devices, device_info):
hbas = self._linuxfc.get_fc_hbas_info()
ports = connection_properties['target_wwn']
possible_devs = self._get_possible_devices(hbas, ports)

18
os_brick/initiator/connectors/iscsi.py

@ -845,10 +845,12 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
:type ignore_errors: bool
"""
return self._cleanup_connection(connection_properties, force=force,
ignore_errors=ignore_errors)
ignore_errors=ignore_errors,
device_info=device_info)
def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
force=False, ignore_errors=False):
force=False, ignore_errors=False,
device_info=None):
"""Cleans up connection flushing and removing devices and multipath.
:param connection_properties: The dictionary that describes all
@ -864,6 +866,7 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
:param ignore_errors: When force is True, this will decide whether to
ignore errors or raise an exception once finished
the operation. Default is False.
:param device_info: Attached device information.
:type ignore_errors: bool
"""
exc = exception.ExceptionChainer()
@ -880,9 +883,14 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
remove_devices = set()
for remove, __ in devices_map.values():
remove_devices.update(remove)
multipath_name = self._linuxscsi.remove_connection(remove_devices,
self.use_multipath,
force, exc)
path_used = self._linuxscsi.get_dev_path(connection_properties,
device_info)
was_multipath = (path_used.startswith('/dev/dm-') or
'mpath' in path_used)
multipath_name = self._linuxscsi.remove_connection(
remove_devices, self.use_multipath, force, exc,
path_used, was_multipath)
# Disconnect sessions and remove nodes that are left without devices
disconnect = [conn for conn, (__, keep) in devices_map.items()

50
os_brick/initiator/linuxscsi.py

@ -60,14 +60,16 @@ class LinuxSCSI(executor.Executor):
else:
return None
def remove_scsi_device(self, device, force=False, exc=None):
def remove_scsi_device(self, device, force=False, exc=None,
flush=True):
"""Removes a scsi device based upon /dev/sdX name."""
path = "/sys/block/%s/device/delete" % device.replace("/dev/", "")
if os.path.exists(path):
exc = exception.ExceptionChainer() if exc is None else exc
# flush any outstanding IO first
with exc.context(force, 'Flushing %s failed', device):
self.flush_device_io(device)
if flush:
# flush any outstanding IO first
with exc.context(force, 'Flushing %s failed', device):
self.flush_device_io(device)
LOG.debug("Remove SCSI device %(device)s with %(path)s",
{'device': device, 'path': path})
@ -193,14 +195,48 @@ class LinuxSCSI(executor.Executor):
return dm
return None
@staticmethod
def get_dev_path(connection_properties, device_info):
"""Determine what path was used by Nova/Cinder to access volume."""
if device_info and device_info.get('path'):
return device_info.get('path')
return connection_properties.get('device_path') or ''
@staticmethod
def requires_flush(path, path_used, was_multipath):
"""Check if a device needs to be flushed when detaching.
A device representing a single path connection to a volume must only be
flushed if it has been used directly by Nova or Cinder to write data.
If the path has been used via a multipath DM or if the device was part
of a multipath but a different single path was used for I/O (instead of
the multipath) then we don't need to flush.
"""
# No used path happens on failed attachs, when we don't care about
# individual flushes.
# When Nova used a multipath we don't need to do individual flushes.
if not path_used or was_multipath:
return False
# We need to flush the single path that was used.
# For encrypted volumes the symlink has been replaced, so realpath
# won't return device under /dev but under /dev/disk/...
path = os.path.realpath(path)
path_used = os.path.realpath(path_used)
return path_used == path or '/dev' != os.path.split(path_used)[0]
def remove_connection(self, devices_names, is_multipath, force=False,
exc=None):
exc=None, path_used=None, was_multipath=False):
"""Remove LUNs and multipath associated with devices names.
:param devices_names: Iterable with real device names ('sda', 'sdb')
:param is_multipath: Whether this is a multipath connection or not
:param force: Whether to forcefully disconnect even if flush fails.
:param exc: ExceptionChainer where to add exceptions if forcing
:param path_used: What path was used by Nova/Cinder for I/O
:param was_multipath: If the path used for I/O was a multipath
:returns: Multipath device map name if found and not flushed
"""
if not devices_names:
@ -220,7 +256,9 @@ class LinuxSCSI(executor.Executor):
multipath_name = None
for device_name in devices_names:
self.remove_scsi_device('/dev/' + device_name, force, exc)
dev_path = '/dev/' + device_name
flush = self.requires_flush(dev_path, path_used, was_multipath)
self.remove_scsi_device(dev_path, force, exc, flush)
# Wait until the symlinks are removed
with exc.context(force, 'Some devices remain from %s', devices_names):

22
os_brick/tests/initiator/connectors/test_fibre_channel.py

@ -259,9 +259,7 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
devices['devices'][0])
expected_commands = [
'multipath -f ' + find_mp_device_path_mock.return_value,
'blockdev --flushbufs /dev/sdb',
'tee -a /sys/block/sdb/device/delete',
'blockdev --flushbufs /dev/sdc',
'tee -a /sys/block/sdc/device/delete',
]
self.assertEqual(expected_commands, self.cmds)
@ -617,3 +615,23 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
self.assertIn('initiator_target_lun_map', connection_info)
self.assertEqual(expected_map,
connection_info['initiator_target_lun_map'])
@ddt.data(('/dev/mapper/<WWN>', True),
('/dev/mapper/mpath0', True),
('/dev/disk/by-path/pci-1-fc-1-lun-1', False))
@ddt.unpack
@mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.remove_scsi_device')
@mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.requires_flush')
@mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.get_dev_path')
def test__remove_devices(self, path_used, was_multipath, get_dev_path_mock,
flush_mock, remove_mock):
get_dev_path_mock.return_value = path_used
self.connector._remove_devices(mock.sentinel.con_props,
[{'device': '/dev/sda'}],
mock.sentinel.device_info)
get_dev_path_mock.assert_called_once_with(mock.sentinel.con_props,
mock.sentinel.device_info)
flush_mock.assert_called_once_with('/dev/sda', path_used,
was_multipath)
remove_mock.assert_called_once_with('/dev/sda',
flush=flush_mock.return_value)

3
os_brick/tests/initiator/connectors/test_fibre_channel_s390x.py

@ -68,7 +68,8 @@ class FibreChannelConnectorS390XTestCase(test_connector.ConnectorTestCase):
def test_remove_devices(self, mock_deconfigure_scsi_device,
mock_get_fc_hbas_info, mock_get_possible_devices):
connection_properties = {'target_wwn': 5, 'target_lun': 2}
self.connector._remove_devices(connection_properties, devices=None)
self.connector._remove_devices(connection_properties, devices=None,
device_info=None)
mock_deconfigure_scsi_device.assert_called_with(3, 5,
"0x0002000000000000")
mock_get_fc_hbas_info.assert_called_once_with()

26
os_brick/tests/initiator/connectors/test_iscsi.py

@ -538,7 +538,8 @@ class ISCSIConnectorTestCase(test_connector.ConnectorTestCase):
cleanup_mock.assert_called_once_with(
mock.sentinel.con_props,
force=mock.sentinel.Force,
ignore_errors=mock.sentinel.ignore_errors)
ignore_errors=mock.sentinel.ignore_errors,
device_info=mock.sentinel.dev_info)
@ddt.data(True, False)
@mock.patch.object(iscsi.ISCSIConnector, '_get_transport')
@ -649,13 +650,21 @@ class ISCSIConnectorTestCase(test_connector.ConnectorTestCase):
['-m', 'discoverydb', '-o', 'show', '-P', 1])
transport_mock.assert_called_once_with()
@ddt.data(('/dev/sda', False),
('/dev/disk/by-id/scsi-WWID', False),
('/dev/dm-11', True),
('/dev/disk/by-id/dm-uuid-mpath-MPATH', True))
@ddt.unpack
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_dev_path')
@mock.patch.object(iscsi.ISCSIConnector, '_disconnect_connection')
@mock.patch.object(iscsi.ISCSIConnector, '_get_connection_devices')
@mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device')
@mock.patch.object(linuxscsi.LinuxSCSI, 'remove_connection',
return_value=None)
def test_cleanup_connection(self, remove_mock, flush_mock, con_devs_mock,
discon_mock):
def test_cleanup_connection(self, path_used, was_multipath, remove_mock,
flush_mock, con_devs_mock, discon_mock,
get_dev_path_mock):
get_dev_path_mock.return_value = path_used
# Return an ordered dicts instead of normal dict for discon_mock.assert
con_devs_mock.return_value = collections.OrderedDict((
(('ip1:port1', 'tgt1'), ({'sda'}, set())),
@ -666,12 +675,16 @@ class ISCSIConnectorTestCase(test_connector.ConnectorTestCase):
'use_multipath') as use_mp_mock:
self.connector._cleanup_connection(
self.CON_PROPS, ips_iqns_luns=mock.sentinel.ips_iqns_luns,
force=False, ignore_errors=False)
force=False, ignore_errors=False,
device_info=mock.sentinel.device_info)
get_dev_path_mock.called_once_with(self.CON_PROPS,
mock.sentinel.device_info)
con_devs_mock.assert_called_once_with(self.CON_PROPS,
mock.sentinel.ips_iqns_luns)
remove_mock.assert_called_once_with({'sda', 'sdb'}, use_mp_mock,
False, mock.ANY)
False, mock.ANY,
path_used, was_multipath)
discon_mock.assert_called_once_with(
self.CON_PROPS,
[('ip1:port1', 'tgt1'), ('ip3:port3', 'tgt3')],
@ -707,7 +720,8 @@ class ISCSIConnectorTestCase(test_connector.ConnectorTestCase):
con_devs_mock.assert_called_once_with(self.CON_PROPS,
mock.sentinel.ips_iqns_luns)
remove_mock.assert_called_once_with({'sda', 'sdb'}, use_mp_mock,
mock.sentinel.force, mock.ANY)
mock.sentinel.force, mock.ANY,
'', False)
discon_mock.assert_called_once_with(
self.CON_PROPS,
[('ip1:port1', 'tgt1'), ('ip3:port3', 'tgt3')],

86
os_brick/tests/initiator/test_linuxscsi.py

@ -33,6 +33,7 @@ class LinuxSCSITestCase(base.TestCase):
def setUp(self):
super(LinuxSCSITestCase, self).setUp()
self.cmds = []
self.realpath = os.path.realpath
self.mock_object(os.path, 'realpath', return_value='/dev/sdc')
self.mock_object(os, 'stat', returns=os.stat(__file__))
self.linuxscsi = linuxscsi.LinuxSCSI(None, execute=self.fake_execute)
@ -91,6 +92,16 @@ class LinuxSCSITestCase(base.TestCase):
flush_mock.assert_called_once_with(device)
echo_mock.assert_called_once_with('/sys/block/sdc/device/delete', '1')
@mock.patch.object(os.path, 'exists', return_value=False)
def test_remove_scsi_device_no_flush(self, exists_mock):
self.linuxscsi.remove_scsi_device("/dev/sdc")
expected_commands = []
self.assertEqual(expected_commands, self.cmds)
exists_mock.return_value = True
self.linuxscsi.remove_scsi_device("/dev/sdc", flush=False)
expected_commands = [('tee -a /sys/block/sdc/device/delete')]
self.assertEqual(expected_commands, self.cmds)
@mock.patch('time.sleep')
@mock.patch('os.path.exists', return_value=True)
def test_wait_for_volumes_removal_failure(self, exists_mock, sleep_mock):
@ -243,8 +254,8 @@ class LinuxSCSITestCase(base.TestCase):
self.assertEqual(get_dm_name_mock.return_value if do_raise else None,
mp_name)
remove_mock.assert_has_calls([
mock.call('/dev/sda', mock.sentinel.Force, exc),
mock.call('/dev/sdb', mock.sentinel.Force, exc)])
mock.call('/dev/sda', mock.sentinel.Force, exc, False),
mock.call('/dev/sdb', mock.sentinel.Force, exc, False)])
wait_mock.assert_called_once_with(devices_names)
self.assertEqual(do_raise, bool(exc))
remove_link_mock.assert_called_once_with(devices_names)
@ -285,8 +296,28 @@ class LinuxSCSITestCase(base.TestCase):
force=mock.sentinel.Force,
exc=exc)
remove_mock.assert_has_calls(
[mock.call('/dev/sda', mock.sentinel.Force, exc),
mock.call('/dev/sdb', mock.sentinel.Force, exc)])
[mock.call('/dev/sda', mock.sentinel.Force, exc, False),
mock.call('/dev/sdb', mock.sentinel.Force, exc, False)])
wait_mock.assert_called_once_with(devices_names)
remove_link_mock.assert_called_once_with(devices_names)
@mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks')
@mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal')
@mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device')
def test_remove_connection_singlepath_used(self, remove_mock, wait_mock,
remove_link_mock):
devices_names = ('sda', 'sdb')
exc = exception.ExceptionChainer()
# realpath was mocked on test setup
with mock.patch('os.path.realpath', side_effect=self.realpath):
self.linuxscsi.remove_connection(devices_names, is_multipath=True,
force=mock.sentinel.Force,
exc=exc, path_used='/dev/sdb',
was_multipath=False)
remove_mock.assert_has_calls(
[mock.call('/dev/sda', mock.sentinel.Force, exc, False),
mock.call('/dev/sdb', mock.sentinel.Force, exc, True)])
wait_mock.assert_called_once_with(devices_names)
remove_link_mock.assert_called_once_with(devices_names)
@ -991,3 +1022,50 @@ loop0 0"""
def test_multipath_add_path(self):
self.linuxscsi.multipath_add_path('/dev/sda')
self.assertEqual(['multipathd add path /dev/sda'], self.cmds)
@ddt.data({'con_props': {}, 'dev_info': {'path': mock.sentinel.path}},
{'con_props': None, 'dev_info': {'path': mock.sentinel.path}},
{'con_props': {'device_path': mock.sentinel.device_path},
'dev_info': {'path': mock.sentinel.path}})
@ddt.unpack
def test_get_dev_path_device_info(self, con_props, dev_info):
self.assertEqual(mock.sentinel.path,
self.linuxscsi.get_dev_path(con_props, dev_info))
@ddt.data({'con_props': {'device_path': mock.sentinel.device_path},
'dev_info': {'path': None}},
{'con_props': {'device_path': mock.sentinel.device_path},
'dev_info': {'path': ''}},
{'con_props': {'device_path': mock.sentinel.device_path},
'dev_info': {}},
{'con_props': {'device_path': mock.sentinel.device_path},
'dev_info': None})
@ddt.unpack
def test_get_dev_path_conn_props(self, con_props, dev_info):
self.assertEqual(mock.sentinel.device_path,
self.linuxscsi.get_dev_path(con_props, dev_info))
@ddt.data({'con_props': {'device_path': ''}, 'dev_info': {'path': None}},
{'con_props': {'device_path': None}, 'dev_info': {'path': ''}},
{'con_props': {}, 'dev_info': {}},
{'con_props': {}, 'dev_info': None})
@ddt.unpack
def test_get_dev_path_no_path(self, con_props, dev_info):
self.assertEqual('', self.linuxscsi.get_dev_path(con_props, dev_info))
@ddt.data(('/dev/sda', '/dev/sda', True, False, None),
('/dev/sda', '', True, False, None),
('/dev/link_sda', '/dev/link_sdb', False, False, ('/dev/sda',
'/dev/sdb')),
('/dev/link_sda', '/dev/link2_sda', False, True, ('/dev/sda',
'/dev/sda')))
@ddt.unpack
def test_requires_flush(self, path, path_used, was_multipath, expected,
real_paths):
with mock.patch('os.path.realpath', side_effect=real_paths) as mocked:
self.assertEqual(
expected,
self.linuxscsi.requires_flush(path, path_used, was_multipath))
if real_paths:
mocked.assert_has_calls([mock.call(path),
mock.call(path_used)])

10
releasenotes/notes/fix-multipath-disconnect-819d01e6e981883e.yaml

@ -0,0 +1,10 @@
---
fixes:
- |
Under certain conditions detaching a multipath device may result in
failure when flushing one of the individual paths, but the disconnect
should have succeeded, because there were other paths available to flush
all the data.
The multipath disconnect mechanism is now more robust and will only fail
when disconnecting if multipath would lose data.
Loading…
Cancel
Save