[Pure Storage] Fix issue with loss of replicated array

When an array on either side of a replicated configuration loses
connectivity to Cinder this can cause the driver to stop working.

This patch resolves that issue by validating all arrays in a
replicated pair exist and raising a warning if that is not
the case.

Closes Bug: #1969784
Co-Authored-By: Keerthivasan <ksuresh@purestorage.com>

Change-Id: I3256875a33a30560b834b4c8a0c6bb1b5edf69fa
This commit is contained in:
Simon Dodsley 2022-05-31 14:51:51 -04:00
parent dcec2f6f01
commit deeee2b363
3 changed files with 130 additions and 57 deletions

View File

@ -747,6 +747,8 @@ class PureBaseSharedDriverTestCase(PureDriverTestCase):
super(PureBaseSharedDriverTestCase, self).setUp()
self.driver = pure.PureBaseVolumeDriver(configuration=self.mock_config)
self.driver._array = self.array
self.mock_object(self.driver, '_get_current_array',
return_value=self.array)
self.driver._replication_pod_name = 'cinder-pod'
self.driver._replication_pg_name = 'cinder-group'
self.purestorage_module.FlashArray.side_effect = None
@ -820,6 +822,24 @@ class PureBaseSharedDriverTestCase(PureDriverTestCase):
return group_snap, group_snap_name
class PureBaseVolumeDriverGetCurrentArrayTestCase(PureDriverTestCase):
def setUp(self):
super(PureBaseVolumeDriverGetCurrentArrayTestCase, self).setUp()
self.driver = pure.PureBaseVolumeDriver(configuration=self.mock_config)
self.driver._array = self.array
self.driver._replication_pod_name = 'cinder-pod'
self.driver._replication_pg_name = 'cinder-group'
self.purestorage_module.FlashArray.side_effect = None
def test_get_current_array(self):
self.driver._is_active_cluster_enabled = True
self.array.array_id = '47966b2d-a1ed-4144-8cae-6332794562b8'
self.array.get_pod.return_value = CINDER_POD
self.driver._active_cluster_target_arrays = [self.array]
self.driver._get_current_array()
self.array.get_pod.assert_called_with('cinder-pod')
@ddt.ddt(testNameFormat=ddt.TestNameFormat.INDEX_ONLY)
class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
def _setup_mocks_for_replication(self):
@ -983,6 +1003,15 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
retention = mock.MagicMock()
mock_generate_replication_retention.return_value = retention
self._setup_mocks_for_replication()
self.mock_config.safe_get.return_value = [
{
"backend_id": "foo",
"managed_backend_name": None,
"san_ip": "1.2.3.4",
"api_token": "abc123",
"type": "sync",
}
]
self.async_array2.get.return_value = GET_ARRAY_SECONDARY
self.array.get.return_value = GET_ARRAY_PRIMARY
self.purestorage_module.FlashArray.side_effect = [self.array,
@ -990,12 +1019,9 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
self.driver._storage_protocol = 'iSCSI'
self.driver.do_setup(None)
self.assertEqual(self.array, self.driver._array)
self.assertEqual(1, len(self.driver._replication_target_arrays))
self.assertEqual(self.async_array2,
self.driver._replication_target_arrays[0])
calls = [
mock.call(self.array, [self.async_array2], 'cinder-group',
REPLICATION_INTERVAL_IN_SEC, retention)
3600, retention)
]
mock_setup_repl_pgroups.assert_has_calls(calls)
@ -1028,11 +1054,6 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
self.driver._storage_protocol = 'iSCSI'
self.driver.do_setup(None)
self.assertEqual(self.array, self.driver._array)
mock_setup_repl_pgroups.assert_has_calls([
mock.call(self.array, [mock_sync_target], 'cinder-group',
REPLICATION_INTERVAL_IN_SEC, retention),
])
mock_setup_pods.assert_has_calls([
mock.call(self.array, [mock_sync_target], 'cinder-pod')
])
@ -1748,6 +1769,7 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
model_update = self.driver.create_consistencygroup(None, cgroup)
expected_name = "consisgroup-" + cgroup.id + "-cinder"
self.driver._get_current_array.assert_called()
self.array.create_pgroup.assert_called_with(expected_name)
self.assertEqual({'status': 'available'}, model_update)
@ -3403,6 +3425,8 @@ class PureISCSIDriverTestCase(PureBaseSharedDriverTestCase):
self.mock_config.use_chap_auth = False
self.driver = pure.PureISCSIDriver(configuration=self.mock_config)
self.driver._array = self.array
self.mock_object(self.driver, '_get_current_array',
return_value=self.array)
self.driver._storage_protocol = 'iSCSI'
self.mock_utils = mock.Mock()
self.driver.driver_utils = self.mock_utils
@ -3905,6 +3929,8 @@ class PureFCDriverTestCase(PureBaseSharedDriverTestCase):
self.driver = pure.PureFCDriver(configuration=self.mock_config)
self.driver._storage_protocol = "FC"
self.driver._array = self.array
self.mock_object(self.driver, '_get_current_array',
return_value=self.array)
self.driver._lookup_service = mock.Mock()
def test_get_host(self):
@ -4445,6 +4471,8 @@ class PureNVMEDriverTestCase(PureBaseSharedDriverTestCase):
super(PureNVMEDriverTestCase, self).setUp()
self.driver = pure.PureNVMEDriver(configuration=self.mock_config)
self.driver._array = self.array
self.mock_object(self.driver, '_get_current_array',
return_value=self.array)
self.driver._storage_protocol = 'NVMe-RoCE'
self.mock_utils = mock.Mock()
self.driver.transport_type = "rdma"

View File

@ -285,27 +285,32 @@ class PureBaseVolumeDriver(san.SanDriver):
uniform = strutils.bool_from_string(
replication_device.get("uniform", False))
target_array = self._get_flasharray(
san_ip,
api_token,
verify_https=verify_https,
ssl_cert_path=ssl_cert_path
)
try:
target_array = self._get_flasharray(
san_ip,
api_token,
verify_https=verify_https,
ssl_cert_path=ssl_cert_path
)
target_array_info = target_array.get()
target_array.array_name = target_array_info["array_name"]
target_array.array_id = target_array_info["id"]
target_array.replication_type = repl_type
target_array.backend_id = backend_id
target_array.uniform = uniform
target_array_info = target_array.get()
target_array.array_name = target_array_info["array_name"]
target_array.array_id = target_array_info["id"]
target_array.replication_type = repl_type
target_array.backend_id = backend_id
target_array.uniform = uniform
LOG.info("Added secondary array: backend_id='%s', name='%s',"
" id='%s', type='%s', uniform='%s'",
target_array.backend_id,
target_array.array_name,
target_array.array_id,
target_array.replication_type,
target_array.uniform)
LOG.info("Added secondary array: backend_id='%s',"
" name='%s', id='%s', type='%s', uniform='%s'",
target_array.backend_id,
target_array.array_name,
target_array.array_id,
target_array.replication_type,
target_array.uniform)
except purestorage.PureError as err:
LOG.warning("Failed to set up secondary array with "
"message: %(msg)s", {"msg": err.reason})
continue
self._replication_target_arrays.append(target_array)
if repl_type == REPLICATION_TYPE_SYNC:
@ -365,38 +370,44 @@ class PureBaseVolumeDriver(san.SanDriver):
# Raises PureDriverException if unable to connect and PureHTTPError
# if unable to authenticate.
self._array = self._get_flasharray(
self.configuration.san_ip,
api_token=self.configuration.pure_api_token,
verify_https=self.configuration.driver_ssl_cert_verify,
ssl_cert_path=self.configuration.driver_ssl_cert_path
)
try:
self._array = self._get_flasharray(
self.configuration.san_ip,
api_token=self.configuration.pure_api_token,
verify_https=self.configuration.driver_ssl_cert_verify,
ssl_cert_path=self.configuration.driver_ssl_cert_path
)
array_info = self._array.get()
if version.parse(array_info["version"]) < version.parse(
'5.3.0'
):
msg = _("FlashArray Purity version less than 5.3.0 unsupported."
" Please upgrade your backend to a supported version.")
raise PureDriverException(msg)
array_info = self._array.get()
if version.parse(array_info["version"]) < version.parse(
'5.3.0'
):
msg = _("FlashArray Purity version less than 5.3.0 "
"unsupported. Please upgrade your backend to "
"a supported version.")
raise PureDriverException(msg)
self._array.array_name = array_info["array_name"]
self._array.array_id = array_info["id"]
self._array.replication_type = None
self._array.backend_id = self._backend_name
self._array.preferred = True
self._array.uniform = True
self._array.array_name = array_info["array_name"]
self._array.array_id = array_info["id"]
self._array.replication_type = None
self._array.backend_id = self._backend_name
self._array.preferred = True
self._array.uniform = True
LOG.info("Primary array: backend_id='%s', name='%s', id='%s'",
self.configuration.config_group,
self._array.array_name,
self._array.array_id)
LOG.info("Primary array: backend_id='%s', name='%s', id='%s'",
self.configuration.config_group,
self._array.array_name,
self._array.array_id)
except purestorage.PureError as err:
LOG.warning("self.do_setup failed to set up primary array with"
" message: %(msg)s", {"msg": err.reason})
self.do_setup_replication()
# If we have failed over at some point we need to adjust our current
# array based on the one that we have failed over to
if (self._active_backend_id is not None and
if (self._array is not None and
self._active_backend_id is not None and
self._active_backend_id != self._array.backend_id):
for secondary_array in self._replication_target_arrays:
if secondary_array.backend_id == self._active_backend_id:
@ -415,7 +426,7 @@ class PureBaseVolumeDriver(san.SanDriver):
# Only set this up on sync rep arrays
self._setup_replicated_pods(
self._get_current_array(),
self._get_current_array(True),
self._active_cluster_target_arrays,
self._replication_pod_name
)
@ -423,7 +434,7 @@ class PureBaseVolumeDriver(san.SanDriver):
# Even if the array is configured for sync rep set it
# up to handle async too
self._setup_replicated_pgroups(
self._get_current_array(),
self._get_current_array(True),
self._replication_target_arrays,
self._replication_pg_name,
self._replication_interval,
@ -551,7 +562,7 @@ class PureBaseVolumeDriver(san.SanDriver):
except purestorage.PureError as err:
with excutils.save_and_reraise_exception():
LOG.error("Failed to add volume %s to pgroup, removing volume",
err)
err.reason)
array.destroy_volume(purity_vol_name)
array.eradicate_volume(purity_vol_name)
@ -2527,7 +2538,29 @@ class PureBaseVolumeDriver(san.SanDriver):
wwn = '3624a9370' + volume_info['serial']
return wwn.lower()
def _get_current_array(self):
def _get_current_array(self, init=False):
if not init and self._is_active_cluster_enabled:
for target_array in self._active_cluster_target_arrays:
try:
LOG.info("Checking target array %s...",
target_array.array_name)
status_ok = False
pod_info = target_array.get_pod(self._replication_pod_name)
for pod_array in pod_info['arrays']:
if pod_array['array_id'] == target_array.array_id:
if pod_array['status'] == 'online':
status_ok = True
break
if not status_ok:
LOG.warning("Target array is offline. Volume "
"replication in unknown state. Check "
"replication links and array state.")
except purestorage.PureError as err:
LOG.warning("self.get_pod failed with"
" message: %(msg)s", {"msg": err})
raise purestorage.PureError('No functional arrays '
'available')
return self._array
def _set_current_array(self, array):
@ -2813,7 +2846,13 @@ class PureFCDriver(PureBaseVolumeDriver, driver.FibreChannelDriver):
target_luns = []
target_wwns = []
for array in target_arrays:
connection = self._connect(array, pure_vol_name, connector)
try:
connection = self._connect(array, pure_vol_name, connector)
except purestorage.PureError as err:
# Swallow any exception, just warn and continue
LOG.warning("self._connect failed with"
" message: %(msg)s", {"msg": err.reason})
continue
array_wwns = self._get_array_wwns(array)
for wwn in array_wwns:
target_wwns.append(wwn)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Pure Storage FlashArray driver `bug #1969784
<https://bugs.launchpad.net/cinder/+bug/1969784>`_: Fixed array failover
incorrectly handles loss of an array due to network issue