multipath/iscsi: iSCSI connections are not reinitiated after reboot

After compute host reboot, in an iSCSI/multipath environment, some
of the connections to the iSCSI portal are not reinitiated and missing
iSCSI devices are observed. This patchset introduced retries for this
particular scenario.

Closes-Bug: #1944474
Change-Id: I60ee7421f7b792e8324286908a9fdd8fb53e433e
(cherry picked from commit 8832c53899)
(cherry picked from commit 4d116483af)
This commit is contained in:
Sophie Huang 2021-09-21 23:05:44 +00:00 committed by Simon Dodsley
parent 0c7f5fcfe5
commit 779d1e48c7
3 changed files with 59 additions and 3 deletions

View File

@ -1040,6 +1040,16 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
return ips, iqns
def _connect_to_iscsi_portal(self, connection_properties):
"""Safely connect to iSCSI portal-target and return the session id."""
portal = connection_properties['target_portal'].split(",")[0]
target_iqn = connection_properties['target_iqn']
lock_name = f'connect_to_iscsi_portal-{portal}-{target_iqn}'
method = synchronized(lock_name)(self._connect_to_iscsi_portal_unsafe)
return method(connection_properties)
@utils.retry((exception.BrickException))
def _connect_to_iscsi_portal_unsafe(self, connection_properties):
"""Connect to an iSCSI portal-target an return the session id."""
portal = connection_properties['target_portal'].split(",")[0]
target_iqn = connection_properties['target_iqn']
@ -1055,9 +1065,17 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
out, err = self._run_iscsiadm(connection_properties, (),
check_exit_code=(0, 21, 255))
if err:
self._run_iscsiadm(connection_properties,
('--interface', self._get_transport(),
'--op', 'new'))
out_new, err_new = self._run_iscsiadm(connection_properties,
('--interface',
self._get_transport(),
'--op', 'new'),
check_exit_code=(0, 6))
if err_new:
# retry if iscsiadm returns 6 for "database failure"
LOG.debug("Retrying to connect to iSCSI portal %s", portal)
msg = (_("Encountered database failure for %s.") % (portal))
raise exception.BrickException(msg=msg)
# Try to set the scan mode to manual
res = self._iscsiadm_update(connection_properties,
'node.session.scan', 'manual',

View File

@ -1113,6 +1113,38 @@ Setting up iSCSI targets: unused
self.assertListEqual(expected_cmds, actual_cmds)
get_sessions_mock.assert_called_once_with()
@mock.patch.object(iscsi.ISCSIConnector, '_iscsiadm_update')
@mock.patch.object(iscsi.ISCSIConnector, '_get_transport',
return_value='default')
@mock.patch.object(iscsi.ISCSIConnector, '_get_iscsi_sessions_full')
@mock.patch('os_brick.utils._time_sleep')
def test_connect_to_iscsi_portal_fail_op_new(self, sleep_mock,
get_sessions_mock,
get_transport_mock,
iscsiadm_update_mock):
get_sessions_mock.return_value = []
with mock.patch.object(self.connector, '_execute') as exec_mock:
exec_mock.side_effect = [('', 21), ('', 6), ('', 21), ('', 6),
('', 21), ('', 6)]
self.assertRaises(exception.BrickException,
self.connector._connect_to_iscsi_portal,
self.CON_PROPS)
expected_cmds = ['iscsiadm -m node -T tgt1 -p ip1:port1',
'iscsiadm -m node -T tgt1 -p ip1:port1 '
'--interface default --op new',
'iscsiadm -m node -T tgt1 -p ip1:port1',
'iscsiadm -m node -T tgt1 -p ip1:port1 '
'--interface default --op new',
'iscsiadm -m node -T tgt1 -p ip1:port1',
'iscsiadm -m node -T tgt1 -p ip1:port1 '
'--interface default --op new']
actual_cmds = [' '.join(args[0]) for args in exec_mock.call_args_list]
self.assertListEqual(expected_cmds, actual_cmds)
iscsiadm_update_mock.assert_not_called()
# Called twice by the retry mechanism
self.assertEqual(2, sleep_mock.call_count)
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_sysfs_wwn',
side_effect=(None, 'tgt2'))
@mock.patch.object(iscsi.ISCSIConnector, '_connect_vol')

View File

@ -0,0 +1,6 @@
---
fixes:
- |
`Bug #1944474 <https://bugs.launchpad.net/os-brick/+bug/1944474>`_: Fixed
missing retries to reinitiate iSCSI connections with high concurrency of
connections and with multipath enabled.