Wait for multipath device to be ready for I/O
The "multipath -C <mpath_name>" command waits for the multipath
device map to be ready for I/O. This is useful in preventing race
conditions when we are trying to write to the multipath device
before it is ready for I/O.
We added 2 new config options to make the wait time configurable,
1. wait_mpath_device_attempts - defaults to 4 attempts
2. wait_mpath_device_interval - defaults to 1 second
Closes-Bug: #2067949
Change-Id: Ib075ec62a2bf993615c5c802f34acd7838bfa2af
(cherry picked from commit 639f953194
)
This commit is contained in:
parent
823cda4b8a
commit
80da091dd3
@ -286,7 +286,12 @@ class FibreChannelConnector(base.BaseLinuxConnector):
|
||||
if multipath_id:
|
||||
# only set the multipath_id if we found one
|
||||
device_info['multipath_id'] = multipath_id
|
||||
|
||||
if self.device_name:
|
||||
device = os.path.basename(self.device_name)
|
||||
mpath = self._linuxscsi.find_sysfs_multipath_dm(
|
||||
[device])
|
||||
# Wait for multipath device to be ready for I/O
|
||||
self._linuxscsi.wait_for_mpath_device(mpath)
|
||||
else:
|
||||
device_path = self.host_device
|
||||
|
||||
|
@ -787,8 +787,11 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
|
||||
if not mpath:
|
||||
LOG.warning('No dm was created, connection to volume is probably '
|
||||
'bad and will perform poorly.')
|
||||
elif not wwn:
|
||||
wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
|
||||
else:
|
||||
# Wait for multipath device to be ready for I/O
|
||||
self._linuxscsi.wait_for_mpath_device(mpath)
|
||||
if not wwn:
|
||||
wwn = self._linuxscsi.get_sysfs_wwn(found, mpath)
|
||||
|
||||
assert wwn is not None
|
||||
return self._get_connect_result(connection_properties, wwn, found,
|
||||
|
@ -26,6 +26,7 @@ import time
|
||||
from typing import Optional
|
||||
|
||||
from oslo_concurrency import processutils as putils
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import excutils
|
||||
|
||||
@ -36,6 +37,7 @@ from os_brick.privileged import rootwrap as priv_rootwrap
|
||||
from os_brick import utils
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
|
||||
MULTIPATH_ERROR_REGEX = re.compile(r"\w{3} \d+ \d\d:\d\d:\d\d \|.*$")
|
||||
MULTIPATH_WWID_REGEX = re.compile(r"\((?P<wwid>.+)\)")
|
||||
@ -853,3 +855,20 @@ class LinuxSCSI(executor.Executor):
|
||||
if map_name and self.get_dm_name(mpath):
|
||||
raise exception.BrickException("Multipath doesn't go away")
|
||||
LOG.debug('Multipath %s no longer present', mpath)
|
||||
|
||||
def wait_for_mpath_device(self, mpath):
|
||||
"""Wait for multipath device to become ready for I/O.
|
||||
|
||||
mpath is the kernel name of the device (dm-*) which is the
|
||||
expected argument for multipath -C command.
|
||||
"""
|
||||
try:
|
||||
self._execute('multipath', '-C', mpath,
|
||||
attempts=CONF.os_brick.wait_mpath_device_attempts,
|
||||
interval=CONF.os_brick.wait_mpath_device_interval,
|
||||
run_as_root=True,
|
||||
root_helper=self._root_helper)
|
||||
except putils.ProcessExecutionError as exc:
|
||||
LOG.error("Failed to get mpath device %(mpath)s ready for "
|
||||
"I/O: %(except)s", {'mpath': mpath, 'except': exc})
|
||||
raise
|
||||
|
@ -23,6 +23,24 @@ _opts = [
|
||||
'for compute nodes, but not for HCI deployments or '
|
||||
'controllers where Glance uses Cinder as a backend, as '
|
||||
'locks should use the same directory.'),
|
||||
cfg.IntOpt('wait_mpath_device_attempts',
|
||||
default=4,
|
||||
min=1,
|
||||
help='Number of attempts for the multipath device to be ready '
|
||||
'for I/O after it was created. Readiness is checked with '
|
||||
'``multipath -C``. See related '
|
||||
'``wait_mpath_device_interval`` config option. Default '
|
||||
'value is 4.'),
|
||||
cfg.IntOpt('wait_mpath_device_interval',
|
||||
default=1,
|
||||
min=1,
|
||||
help='Interval value to wait for multipath device to be ready '
|
||||
'for I/O. Max number of attempts is set in '
|
||||
'``wait_mpath_device_attempts``. Time in seconds to wait '
|
||||
'for each retry is ``base ^ attempt * interval``, so for '
|
||||
'4 attempts (1 attempt 3 retries) and 1 second interval '
|
||||
'will yield: 2, 4 and 8 seconds. Note that there is no '
|
||||
'wait before first attempt. Default value is 1.'),
|
||||
]
|
||||
|
||||
cfg.CONF.register_opts(_opts, group='os_brick')
|
||||
|
@ -291,6 +291,7 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
|
||||
connection_info['data'])
|
||||
|
||||
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
|
||||
@mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_mpath_device')
|
||||
def _test_connect_volume_multipath(self, get_device_info_mock,
|
||||
get_scsi_wwn_mock,
|
||||
get_fc_hbas_info_mock,
|
||||
@ -301,6 +302,7 @@ class FibreChannelConnectorTestCase(test_connector.ConnectorTestCase):
|
||||
find_mp_dev_mock,
|
||||
access_mode,
|
||||
should_wait_for_rw,
|
||||
wait_mpath_device_mock,
|
||||
find_mp_device_path_mock):
|
||||
self.connector.use_multipath = True
|
||||
get_fc_hbas_mock.side_effect = self.fake_get_fc_hbas
|
||||
|
@ -1443,3 +1443,39 @@ loop0 0"""
|
||||
def test_lun_for_addressing_sam3_flat(self, original_lun, expected_lun):
|
||||
lun = self.linuxscsi.lun_for_addressing(original_lun, 'SAM3-flat')
|
||||
self.assertEqual(expected_lun, lun)
|
||||
|
||||
@mock.patch.object(linuxscsi, 'LOG')
|
||||
@mock.patch.object(linuxscsi.LinuxSCSI, '_execute')
|
||||
def test_wait_for_mpath_device(self, exec_mock, mock_log):
|
||||
exec_mock.return_value = (
|
||||
"3293379.070675 | /dev/dm-7: path sdb is usable",
|
||||
None,
|
||||
)
|
||||
mpath_name = 'dm-7'
|
||||
self.linuxscsi.wait_for_mpath_device(mpath_name)
|
||||
|
||||
self.assertEqual(1, exec_mock.call_count)
|
||||
exec_mock.assert_called_once_with(
|
||||
'multipath', '-C', mpath_name,
|
||||
attempts=4, interval=1,
|
||||
run_as_root=True,
|
||||
root_helper=self.linuxscsi._root_helper)
|
||||
mock_log.error.assert_not_called()
|
||||
|
||||
@mock.patch.object(linuxscsi, 'LOG')
|
||||
@mock.patch.object(linuxscsi.LinuxSCSI, '_execute')
|
||||
def test_wait_for_mpath_device_fails(self, exec_mock, mock_log):
|
||||
exec_mock.side_effect = putils.ProcessExecutionError
|
||||
|
||||
mpath_name = 'dm-7'
|
||||
exc = self.assertRaises(putils.ProcessExecutionError,
|
||||
self.linuxscsi.wait_for_mpath_device, 'dm-7')
|
||||
|
||||
exec_mock.assert_called_once_with(
|
||||
'multipath', '-C', mpath_name,
|
||||
attempts=4, interval=1,
|
||||
run_as_root=True,
|
||||
root_helper=self.linuxscsi._root_helper)
|
||||
mock_log.error.assert_called_once_with(
|
||||
"Failed to get mpath device %(mpath)s ready for "
|
||||
"I/O: %(except)s", {'mpath': mpath_name, 'except': exc})
|
||||
|
16
releasenotes/notes/wait-mpath-io-703605e74ee009ef.yaml
Normal file
16
releasenotes/notes/wait-mpath-io-703605e74ee009ef.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
`Bug #2067949 <https://bugs.launchpad.net/cinder/+bug/2067949>`_:
|
||||
Fixed issue where we try to write into a multipath device and fail
|
||||
since it is not ready for I/O. Now we wait until the I/O is likely
|
||||
to succeed.
|
||||
We introduced 2 new config options to make the wait time
|
||||
configurable:
|
||||
|
||||
* ``wait_mpath_device_attempts``
|
||||
* ``wait_mpath_device_interval``
|
||||
|
||||
These options defaults to 4 attempts and 1 second interval
|
||||
respectively. See help text of the config options for more
|
||||
information.
|
Loading…
Reference in New Issue
Block a user