Wait for multipath device to be ready for I/O

The "multipath -C <mpath_name>" command waits for the multipath
device map to be ready for I/O. This is useful in preventing race
conditions when we are trying to write to the multipath device
before it is ready for I/O.

We added 2 new config options to make the wait time configurable,

1. wait_mpath_device_attempts - defaults to 4 attempts
2. wait_mpath_device_interval - defaults to 1 second

Closes-Bug: #2067949

Change-Id: Ib075ec62a2bf993615c5c802f34acd7838bfa2af
(cherry picked from commit 639f953194)
This commit is contained in:
Rajat Dhasmana 2024-05-27 18:33:54 +05:30
parent 823cda4b8a
commit 80da091dd3
7 changed files with 102 additions and 3 deletions

View File

@ -286,7 +286,12 @@ class FibreChannelConnector(base.BaseLinuxConnector):
if multipath_id:
# only set the multipath_id if we found one
device_info['multipath_id'] = multipath_id
if self.device_name:
device = os.path.basename(self.device_name)
mpath = self._linuxscsi.find_sysfs_multipath_dm(
[device])
# Wait for multipath device to be ready for I/O
self._linuxscsi.wait_for_mpath_device(mpath)
else:
device_path = self.host_device

View File

@ -787,8 +787,11 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
if not mpath:
LOG.warning('No dm was created, connection to volume is probably '
'bad and will perform poorly.')
elif not wwn:
wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
else:
# Wait for multipath device to be ready for I/O
self._linuxscsi.wait_for_mpath_device(mpath)
if not wwn:
wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
assert wwn is not None
return self._get_connect_result(connection_properties, wwn, found,

View File

@ -26,6 +26,7 @@ import time
from typing import Optional
from oslo_concurrency import processutils as putils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import excutils
@ -36,6 +37,7 @@ from os_brick.privileged import rootwrap as priv_rootwrap
from os_brick import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
MULTIPATH_ERROR_REGEX = re.compile(r"\w{3} \d+ \d\d:\d\d:\d\d \|.*$")
MULTIPATH_WWID_REGEX = re.compile(r"\((?P<wwid>.+)\)")
@ -853,3 +855,20 @@ class LinuxSCSI(executor.Executor):
if map_name and self.get_dm_name(mpath):
raise exception.BrickException("Multipath doesn't go away")
LOG.debug('Multipath %s no longer present', mpath)
def wait_for_mpath_device(self, mpath):
"""Wait for multipath device to become ready for I/O.
mpath is the kernel name of the device (dm-*) which is the
expected argument for multipath -C command.
"""
try:
self._execute('multipath', '-C', mpath,
attempts=CONF.os_brick.wait_mpath_device_attempts,
interval=CONF.os_brick.wait_mpath_device_interval,
run_as_root=True,
root_helper=self._root_helper)
except putils.ProcessExecutionError as exc:
LOG.error("Failed to get mpath device %(mpath)s ready for "
"I/O: %(except)s", {'mpath': mpath, 'except': exc})
raise

View File

@ -23,6 +23,24 @@ _opts = [
'for compute nodes, but not for HCI deployments or '
'controllers where Glance uses Cinder as a backend, as '
'locks should use the same directory.'),
cfg.IntOpt('wait_mpath_device_attempts',
default=4,
min=1,
help='Number of attempts for the multipath device to be ready '
'for I/O after it was created. Readiness is checked with '
'``multipath -C``. See related '
'``wait_mpath_device_interval`` config option. Default '
'value is 4.'),
cfg.IntOpt('wait_mpath_device_interval',
default=1,
min=1,
help='Interval value to wait for multipath device to be ready '
'for I/O. Max number of attempts is set in '
'``wait_mpath_device_attempts``. Time in seconds to wait '
'for each retry is ``base ^ attempt * interval``, so for '
'4 attempts (1 attempt 3 retries) and 1 second interval '
'will yield: 2, 4 and 8 seconds. Note that there is no '
'wait before first attempt. Default value is 1.'),
]
cfg.CONF.register_opts(_opts, group='os_brick')

View File

@ -291,6 +291,7 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
connection_info['data'])
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
@mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_mpath_device')
def _test_connect_volume_multipath(self, get_device_info_mock,
get_scsi_wwn_mock,
get_fc_hbas_info_mock,
@ -301,6 +302,7 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
find_mp_dev_mock,
access_mode,
should_wait_for_rw,
wait_mpath_device_mock,
find_mp_device_path_mock):
self.connector.use_multipath = True
get_fc_hbas_mock.side_effect = self.fake_get_fc_hbas

View File

@ -1443,3 +1443,39 @@ loop0 0"""
def test_lun_for_addressing_sam3_flat(self, original_lun, expected_lun):
lun = self.linuxscsi.lun_for_addressing(original_lun, 'SAM3-flat')
self.assertEqual(expected_lun, lun)
@mock.patch.object(linuxscsi, 'LOG')
@mock.patch.object(linuxscsi.LinuxSCSI, '_execute')
def test_wait_for_mpath_device(self, exec_mock, mock_log):
exec_mock.return_value = (
"3293379.070675 | /dev/dm-7: path sdb is usable",
None,
)
mpath_name = 'dm-7'
self.linuxscsi.wait_for_mpath_device(mpath_name)
self.assertEqual(1, exec_mock.call_count)
exec_mock.assert_called_once_with(
'multipath', '-C', mpath_name,
attempts=4, interval=1,
run_as_root=True,
root_helper=self.linuxscsi._root_helper)
mock_log.error.assert_not_called()
@mock.patch.object(linuxscsi, 'LOG')
@mock.patch.object(linuxscsi.LinuxSCSI, '_execute')
def test_wait_for_mpath_device_fails(self, exec_mock, mock_log):
exec_mock.side_effect = putils.ProcessExecutionError
mpath_name = 'dm-7'
exc = self.assertRaises(putils.ProcessExecutionError,
self.linuxscsi.wait_for_mpath_device, 'dm-7')
exec_mock.assert_called_once_with(
'multipath', '-C', mpath_name,
attempts=4, interval=1,
run_as_root=True,
root_helper=self.linuxscsi._root_helper)
mock_log.error.assert_called_once_with(
"Failed to get mpath device %(mpath)s ready for "
"I/O: %(except)s", {'mpath': mpath_name, 'except': exc})

View File

@ -0,0 +1,16 @@
---
fixes:
- |
`Bug #2067949 <https://bugs.launchpad.net/cinder/+bug/2067949>`_:
Fixed issue where we try to write into a multipath device and fail
since it is not ready for I/O. Now we wait until the I/O is likely
to succeed.
We introduced 2 new config options to make the wait time
configurable:
* ``wait_mpath_device_attempts``
* ``wait_mpath_device_interval``
These options defaults to 4 attempts and 1 second interval
respectively. See help text of the config options for more
information.