From 1435059da5c3b66e83ec9108cd85d17471a8d747 Mon Sep 17 00:00:00 2001 From: Rajat Dhasmana Date: Mon, 31 Jul 2023 09:14:59 +0000 Subject: [PATCH] Check paths before extending device In a multipath enabled deployment, when we try to extend a volume and some paths are down, we fail to extend the multipath device and leave the environment in an inconsistent state. See LP Bug #2032177 for more details. To handle this, we check if all the paths are up before trying to extend the device and fail fast if any path is down. This ensures we don't partially extend some paths and leave the other to the original size leading to inconsistent state in the environment. Closes-Bug: 2032177 Change-Id: I5fc02efc5e9657821a1335f1c1ac5fe036e9329a --- os_brick/initiator/connectors/base.py | 18 +-------- os_brick/initiator/linuxscsi.py | 9 +++++ os_brick/tests/initiator/test_linuxscsi.py | 39 ++++++++++++++++--- os_brick/tests/test_utils.py | 25 ++++++++++++ os_brick/utils.py | 16 ++++++++ ...fix-extend-multipath-a308d333061665fd.yaml | 9 +++++ 6 files changed, 95 insertions(+), 21 deletions(-) create mode 100644 releasenotes/notes/fix-extend-multipath-a308d333061665fd.yaml diff --git a/os_brick/initiator/connectors/base.py b/os_brick/initiator/connectors/base.py index 1c856db6e..480218e63 100644 --- a/os_brick/initiator/connectors/base.py +++ b/os_brick/initiator/connectors/base.py @@ -22,7 +22,6 @@ import typing from typing import Optional from oslo_concurrency import lockutils -from oslo_concurrency import processutils as putils from oslo_config import cfg from oslo_log import log as logging from oslo_utils import reflection @@ -33,6 +32,7 @@ from os_brick import initiator from os_brick.initiator import host_driver from os_brick.initiator import initiator_connector from os_brick.initiator import linuxscsi +from os_brick import utils LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -124,21 +124,7 @@ class BaseLinuxConnector(initiator_connector.InitiatorConnector): return props def check_valid_device(self, path: str, run_as_root: bool = True) -> bool: - cmd = ('dd', 'if=%(path)s' % {"path": path}, - 'of=/dev/null', 'count=1') - out, info = None, None - try: - out, info = self._execute(*cmd, run_as_root=run_as_root, - root_helper=self._root_helper) - except putils.ProcessExecutionError as e: - LOG.error("Failed to access the device on the path " - "%(path)s: %(error)s.", - {"path": path, "error": e.stderr}) - return False - # If the info is none, the path does not exist. - if info is None: - return False - return True + return utils.check_valid_device(self, path) def get_all_available_volumes( self, diff --git a/os_brick/initiator/linuxscsi.py b/os_brick/initiator/linuxscsi.py index 38d824152..4bf53c304 100644 --- a/os_brick/initiator/linuxscsi.py +++ b/os_brick/initiator/linuxscsi.py @@ -679,6 +679,15 @@ class LinuxSCSI(executor.Executor): This function tries to signal the local system's kernel that an already attached volume might have been resized. """ + # We need all paths up before extending the devices. + # see Launchpad Bug: #2032177 for more details. + LOG.debug("Checking paths are valid %s", volume_paths) + for volume_path in volume_paths: + if not utils.check_valid_device(self, volume_path): + LOG.error("Path status is down for path %s", volume_path) + raise exception.BrickException("All paths need to be up " + "to extend the device.") + LOG.debug("extend volume %s", volume_paths) for volume_path in volume_paths: diff --git a/os_brick/tests/initiator/test_linuxscsi.py b/os_brick/tests/initiator/test_linuxscsi.py index d6f8a845d..a8d7bf817 100644 --- a/os_brick/tests/initiator/test_linuxscsi.py +++ b/os_brick/tests/initiator/test_linuxscsi.py @@ -770,6 +770,7 @@ loop0 0""" expected_commands = ['multipathd resize map %s' % dm_path] self.assertEqual(expected_commands, self.cmds) + @mock.patch('os_brick.utils.check_valid_device') @mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path') @mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn') @mock.patch('os_brick.utils.get_device_size') @@ -777,7 +778,8 @@ loop0 0""" def test_extend_volume_no_mpath(self, mock_device_info, mock_device_size, mock_scsi_wwn, - mock_find_mpath_path): + mock_find_mpath_path, + mock_valid_dev): """Test extending a volume where there is no multipath device.""" fake_device = {'host': '0', 'channel': '0', @@ -792,6 +794,7 @@ loop0 0""" wwn = '1234567890123456' mock_scsi_wwn.return_value = wwn mock_find_mpath_path.return_value = None + mock_valid_dev.return_value = True ret_size = self.linuxscsi.extend_volume(['/dev/fake']) self.assertEqual(second_size, ret_size) @@ -800,6 +803,7 @@ loop0 0""" expected_cmds = ['tee -a /sys/bus/scsi/drivers/sd/0:0:0:1/rescan'] self.assertEqual(expected_cmds, self.cmds) + @mock.patch('os_brick.utils.check_valid_device') @mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path') @mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn') @mock.patch('os_brick.utils.get_device_size') @@ -809,7 +813,8 @@ loop0 0""" mock_device_info, mock_device_size, mock_scsi_wwn, - mock_find_mpath_path): + mock_find_mpath_path, + mock_valid_dev): """Test extending a volume where there is a multipath device.""" mock_device_info.side_effect = [{'host': host, 'channel': '0', @@ -823,6 +828,7 @@ loop0 0""" mock_find_mpath_path.return_value = mpath_path dm_path = '/dev/dm-5' mock_realpath.return_value = dm_path + mock_valid_dev.return_value = True ret_size = self.linuxscsi.extend_volume(['/dev/fake1', '/dev/fake2'], use_multipath=True) @@ -836,6 +842,7 @@ loop0 0""" self.assertEqual(expected_cmds, self.cmds) mock_realpath.assert_called_once_with(mpath_path) + @mock.patch('os_brick.utils.check_valid_device') @mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map') @mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path') @mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn') @@ -845,7 +852,8 @@ loop0 0""" mock_device_size, mock_scsi_wwn, mock_find_mpath_path, - mock_mpath_resize_map): + mock_mpath_resize_map, + mock_valid_dev): """Test extending a volume where there is a multipath device fail.""" mock_device_info.side_effect = [{'host': host, 'channel': '0', @@ -860,6 +868,7 @@ loop0 0""" mock_mpath_resize_map.side_effect = putils.ProcessExecutionError( stdout="fail") + mock_valid_dev.return_value = True self.assertRaises( putils.ProcessExecutionError, @@ -873,6 +882,7 @@ loop0 0""" 'multipathd reconfigure'] self.assertEqual(expected_cmds, self.cmds) + @mock.patch('os_brick.utils.check_valid_device') @mock.patch('time.sleep') @mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map') @mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path') @@ -886,7 +896,8 @@ loop0 0""" mock_scsi_wwn, mock_find_mpath_path, mock_mpath_resize_map, - mock_sleep): + mock_sleep, + mock_valid_dev): """Test extending a volume where there is a multipath device fail.""" mock_device_info.side_effect = [{'host': host, 'channel': '0', @@ -903,6 +914,7 @@ loop0 0""" mock_mpath_resize_map.side_effect = ( putils.ProcessExecutionError(stdout="timeout"), "success") + mock_valid_dev.return_value = True ret_size = self.linuxscsi.extend_volume(['/dev/fake1', '/dev/fake2'], use_multipath=True) @@ -916,6 +928,7 @@ loop0 0""" mock_mpath_resize_map.assert_has_calls([mock.call(dm_path)] * 2) mock_realpath.assert_called_once_with(mpath_path) + @mock.patch('os_brick.utils.check_valid_device') @mock.patch('time.sleep') @mock.patch('time.time') @mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map') @@ -929,7 +942,8 @@ loop0 0""" mock_find_mpath_path, mock_mpath_resize_map, mock_currtime, - mock_sleep): + mock_sleep, + mock_valid_dev): """Test extending a volume where there is a multipath device fail.""" mock_device_info.side_effect = [{'host': host, 'channel': '0', @@ -941,6 +955,7 @@ loop0 0""" mock_scsi_wwn.return_value = wwn mock_find_mpath_path.return_value = ('/dev/mapper/dm-uuid-mpath-%s' % wwn) + mock_valid_dev.return_value = True # time.time is used to check if our own timeout has been exceeded, # which is why it has to be mocked. @@ -963,6 +978,20 @@ loop0 0""" ['/dev/fake1', '/dev/fake2'], use_multipath=True) + @mock.patch('os_brick.utils.check_valid_device') + def test_extend_volume_with_mpath_path_down(self, mock_valid_dev): + """Test extending a volume where there is a path down.""" + mock_valid_dev.return_value = False + dev1 = '/dev/fake1' + dev2 = '/dev/fake2' + + self.assertRaises( + exception.BrickException, + self.linuxscsi.extend_volume, + [dev1, dev2], + use_multipath=True) + mock_valid_dev.assert_called_once_with(self.linuxscsi, dev1) + def test_process_lun_id_list(self): lun_list = [2, 255, 88, 370, 5, 256] result = self.linuxscsi.process_lun_id(lun_list) diff --git a/os_brick/tests/test_utils.py b/os_brick/tests/test_utils.py index 8dab15f04..d7d65f6d9 100644 --- a/os_brick/tests/test_utils.py +++ b/os_brick/tests/test_utils.py @@ -83,6 +83,31 @@ class TestUtils(base.TestCase): mock_create.assert_not_called() self.assertIsNone(result) + @ddt.data(('fake_info', True), (None, False)) + @ddt.unpack + def test_check_valid_device(self, fake_info, expected): + mock_execute = mock.Mock() + mock_execute._execute.return_value = ('fake_out', fake_info) + + fake_path = '/dev/fake' + is_valid = utils.check_valid_device(mock_execute, fake_path) + self.assertEqual(expected, is_valid) + mock_execute._execute.assert_called_once_with( + 'dd', 'if=/dev/fake', 'of=/dev/null', 'count=1', + run_as_root=True, root_helper=mock_execute._root_helper) + + def test_check_valid_device_error(self): + mock_execute = mock.Mock() + p_exception = utils.processutils.ProcessExecutionError + mock_execute._execute.side_effect = p_exception + + fake_path = '/dev/fake' + is_valid = utils.check_valid_device(mock_execute, fake_path) + self.assertEqual(False, is_valid) + mock_execute._execute.assert_called_once_with( + 'dd', 'if=/dev/fake', 'of=/dev/null', 'count=1', + run_as_root=True, root_helper=mock_execute._root_helper) + class TestRetryDecorator(base.TestCase): diff --git a/os_brick/utils.py b/os_brick/utils.py index b8af536de..8330c9b29 100644 --- a/os_brick/utils.py +++ b/os_brick/utils.py @@ -431,6 +431,22 @@ def get_device_size(executor: executor.Executor, device: str) -> Optional[int]: return None +def check_valid_device(executor: executor.Executor, path: str) -> bool: + cmd = ('dd', 'if=%(path)s' % {"path": path}, + 'of=/dev/null', 'count=1') + out, info = None, None + try: + out, info = executor._execute(*cmd, run_as_root=True, + root_helper=executor._root_helper) + except processutils.ProcessExecutionError as e: + LOG.error("Failed to access the device on the path " + "%(path)s: %(error)s.", + {"path": path, "error": e.stderr}) + return False + # If the info is none, the path does not exist. + return info is not None + + class Anything(object): """Object equal to everything.""" def __eq__(self, other): diff --git a/releasenotes/notes/fix-extend-multipath-a308d333061665fd.yaml b/releasenotes/notes/fix-extend-multipath-a308d333061665fd.yaml new file mode 100644 index 000000000..a484bbf78 --- /dev/null +++ b/releasenotes/notes/fix-extend-multipath-a308d333061665fd.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + `Bug #2032177 `_: + Handled the issue when one or more paths of a multipath device are down and + we try to extend the device. This is addressed by adding an additional check + to verify that all devices are up before extending the multipath device to + avoid leaving devices with inconsistent sizes. +