Retry executing command "nvme list" when fail

Only one NVMe connection can be active on the same Compute node, when
initiator side has more than one connection then new connection is made,
results of the new connected device is not instantaneous.

Fixing this issue requires retry and sleep when executing command
"nvme list" to retrieve the newly connected device.

Change-Id: I6b70140be7023770b83603de723d6d2de3ebb747
Closes-Bug: #1792313
(cherry picked from commit fab367cc26)
This commit is contained in:
Hamdy Khader 2018-09-13 14:38:11 +03:00
parent e01e5db822
commit a8215ba452
2 changed files with 39 additions and 33 deletions

View File

@ -18,12 +18,12 @@ from oslo_concurrency import processutils as putils
from oslo_log import log as logging
from os_brick import exception
from os_brick import initiator
from os_brick.i18n import _
from os_brick.initiator.connectors import base
from os_brick import utils
DEVICE_SCAN_ATTEMPTS_DEFAULT = 3
DEVICE_SCAN_ATTEMPTS_DEFAULT = 5
LOG = logging.getLogger(__name__)
@ -35,7 +35,7 @@ class NVMeConnector(base.BaseLinuxConnector):
"""Connector class to attach/detach NVMe over fabric volumes."""
def __init__(self, root_helper, driver=None,
device_scan_attempts=initiator.DEVICE_SCAN_ATTEMPTS_DEFAULT,
device_scan_attempts=DEVICE_SCAN_ATTEMPTS_DEFAULT,
*args, **kwargs):
super(NVMeConnector, self).__init__(
root_helper,
@ -60,21 +60,28 @@ class NVMeConnector(base.BaseLinuxConnector):
nvme_devices = []
pattern = r'/dev/nvme[0-9]n[0-9]'
cmd = ['nvme', 'list']
try:
(out, err) = self._execute(*cmd, root_helper=self._root_helper,
run_as_root=True)
for line in out.split('\n'):
result = re.match(pattern, line)
if result:
nvme_devices.append(result.group(0))
LOG.debug("_get_nvme_devices returned %(nvme_devices)s",
{'nvme_devices': nvme_devices})
return nvme_devices
for retry in range(1, self.device_scan_attempts + 1):
try:
(out, err) = self._execute(*cmd,
root_helper=self._root_helper,
run_as_root=True)
for line in out.split('\n'):
result = re.match(pattern, line)
if result:
nvme_devices.append(result.group(0))
LOG.debug("_get_nvme_devices returned %(nvme_devices)s",
{'nvme_devices': nvme_devices})
return nvme_devices
except putils.ProcessExecutionError:
LOG.error(
"Failed to list available NVMe connected controllers.")
raise
except putils.ProcessExecutionError:
LOG.warning(
"Failed to list available NVMe connected controllers, "
"retrying.")
time.sleep(retry ** 2)
else:
msg = _("Failed to retrieve available connected NVMe controllers "
"when running nvme list.")
raise exception.CommandExecutionFailed(message=msg)
@utils.trace
@synchronized('connect_volume')
@ -113,18 +120,12 @@ class NVMeConnector(base.BaseLinuxConnector):
"%(conn_nqn)s", {'conn_nqn': conn_nqn})
raise
for retry in range(1, self.device_scan_attempts + 1):
all_nvme_devices = self._get_nvme_devices()
path = set(all_nvme_devices) - set(current_nvme_devices)
if path:
break
time.sleep(retry ** 2)
else:
LOG.error("Failed to retrieve available connected NVMe "
"controllers when running nvme list")
all_nvme_devices = self._get_nvme_devices()
path = set(all_nvme_devices) - set(current_nvme_devices)
path = list(path)
if not path:
raise exception.TargetPortalNotFound(target_portal)
path = list(path)
LOG.debug("all_nvme_devices are %(all_nvme_devices)s",
{'all_nvme_devices': all_nvme_devices})
device_info['path'] = path[0]

View File

@ -48,9 +48,10 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase):
self.assertEqual(expected, actual)
@mock.patch.object(nvme.NVMeConnector, '_execute')
def test_get_nvme_devices_raise(self, mock_execute):
@mock.patch('time.sleep')
def test_get_nvme_devices_raise(self, mock_sleep, mock_execute):
mock_execute.side_effect = putils.ProcessExecutionError
self.assertRaises(putils.ProcessExecutionError,
self.assertRaises(exception.CommandExecutionFailed,
self.connector._get_nvme_devices)
@mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices')
@ -83,14 +84,15 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase):
run_as_root=True)
@mock.patch.object(nvme.NVMeConnector, '_execute')
def test_connect_volume_raise(self, mock_execute):
@mock.patch('time.sleep')
def test_connect_volume_raise(self, mock_sleep, mock_execute):
connection_properties = {'target_portal': 'portal',
'target_port': 1,
'nqn': 'nqn.volume_123',
'device_path': '',
'transport_type': 'rdma'}
mock_execute.side_effect = putils.ProcessExecutionError
self.assertRaises(putils.ProcessExecutionError,
self.assertRaises(exception.CommandExecutionFailed,
self.connector.connect_volume,
connection_properties)
@ -113,7 +115,8 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase):
@mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices')
@mock.patch.object(nvme.NVMeConnector, '_execute')
def test_disconnect_volume(self, mock_devices, mock_execute):
@mock.patch('time.sleep')
def test_disconnect_volume(self, mock_sleep, mock_execute, mock_devices):
connection_properties = {'target_portal': 'portal',
'target_port': 1,
'nqn': 'nqn.volume_123',
@ -130,7 +133,9 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase):
@mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices')
@mock.patch.object(nvme.NVMeConnector, '_execute')
def test_disconnect_volume_raise(self, mock_devices, mock_execute):
@mock.patch('time.sleep')
def test_disconnect_volume_raise(
self, mock_sleep, mock_execute, mock_devices):
mock_execute.side_effect = putils.ProcessExecutionError
mock_devices.return_value = '/dev/nvme0n1'
connection_properties = {'target_portal': 'portal',