From deeee2b363c4f6acf0e5cfaa6b2b04b6eb6c8362 Mon Sep 17 00:00:00 2001 From: Simon Dodsley Date: Tue, 31 May 2022 14:51:51 -0400 Subject: [PATCH] [Pure Storage] Fix issue with loss of replicated array When an array on either side of a replicated configuration loses connectivity to Cinder this can cause the driver to stop working. This patch resolves that issue by validating all arrays in a replicated pair exist and raising a warning if that is not the case. Closes Bug: #1969784 Co-Authored-By: Keerthivasan Change-Id: I3256875a33a30560b834b4c8a0c6bb1b5edf69fa --- cinder/tests/unit/volume/drivers/test_pure.py | 46 ++++-- cinder/volume/drivers/pure.py | 135 +++++++++++------- ...idate-replica-arrays-a76630cab9435770.yaml | 6 + 3 files changed, 130 insertions(+), 57 deletions(-) create mode 100644 releasenotes/notes/pure-validate-replica-arrays-a76630cab9435770.yaml diff --git a/cinder/tests/unit/volume/drivers/test_pure.py b/cinder/tests/unit/volume/drivers/test_pure.py index 40fbc79f3c0..1b92acf42a6 100644 --- a/cinder/tests/unit/volume/drivers/test_pure.py +++ b/cinder/tests/unit/volume/drivers/test_pure.py @@ -747,6 +747,8 @@ class PureBaseSharedDriverTestCase(PureDriverTestCase): super(PureBaseSharedDriverTestCase, self).setUp() self.driver = pure.PureBaseVolumeDriver(configuration=self.mock_config) self.driver._array = self.array + self.mock_object(self.driver, '_get_current_array', + return_value=self.array) self.driver._replication_pod_name = 'cinder-pod' self.driver._replication_pg_name = 'cinder-group' self.purestorage_module.FlashArray.side_effect = None @@ -820,6 +822,24 @@ class PureBaseSharedDriverTestCase(PureDriverTestCase): return group_snap, group_snap_name +class PureBaseVolumeDriverGetCurrentArrayTestCase(PureDriverTestCase): + def setUp(self): + super(PureBaseVolumeDriverGetCurrentArrayTestCase, self).setUp() + self.driver = pure.PureBaseVolumeDriver(configuration=self.mock_config) + self.driver._array = self.array + self.driver._replication_pod_name = 'cinder-pod' + self.driver._replication_pg_name = 'cinder-group' + self.purestorage_module.FlashArray.side_effect = None + + def test_get_current_array(self): + self.driver._is_active_cluster_enabled = True + self.array.array_id = '47966b2d-a1ed-4144-8cae-6332794562b8' + self.array.get_pod.return_value = CINDER_POD + self.driver._active_cluster_target_arrays = [self.array] + self.driver._get_current_array() + self.array.get_pod.assert_called_with('cinder-pod') + + @ddt.ddt(testNameFormat=ddt.TestNameFormat.INDEX_ONLY) class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase): def _setup_mocks_for_replication(self): @@ -983,6 +1003,15 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase): retention = mock.MagicMock() mock_generate_replication_retention.return_value = retention self._setup_mocks_for_replication() + self.mock_config.safe_get.return_value = [ + { + "backend_id": "foo", + "managed_backend_name": None, + "san_ip": "1.2.3.4", + "api_token": "abc123", + "type": "sync", + } + ] self.async_array2.get.return_value = GET_ARRAY_SECONDARY self.array.get.return_value = GET_ARRAY_PRIMARY self.purestorage_module.FlashArray.side_effect = [self.array, @@ -990,12 +1019,9 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase): self.driver._storage_protocol = 'iSCSI' self.driver.do_setup(None) self.assertEqual(self.array, self.driver._array) - self.assertEqual(1, len(self.driver._replication_target_arrays)) - self.assertEqual(self.async_array2, - self.driver._replication_target_arrays[0]) calls = [ mock.call(self.array, [self.async_array2], 'cinder-group', - REPLICATION_INTERVAL_IN_SEC, retention) + 3600, retention) ] mock_setup_repl_pgroups.assert_has_calls(calls) @@ -1028,11 +1054,6 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase): self.driver._storage_protocol = 'iSCSI' self.driver.do_setup(None) self.assertEqual(self.array, self.driver._array) - - mock_setup_repl_pgroups.assert_has_calls([ - mock.call(self.array, [mock_sync_target], 'cinder-group', - REPLICATION_INTERVAL_IN_SEC, retention), - ]) mock_setup_pods.assert_has_calls([ mock.call(self.array, [mock_sync_target], 'cinder-pod') ]) @@ -1748,6 +1769,7 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase): model_update = self.driver.create_consistencygroup(None, cgroup) expected_name = "consisgroup-" + cgroup.id + "-cinder" + self.driver._get_current_array.assert_called() self.array.create_pgroup.assert_called_with(expected_name) self.assertEqual({'status': 'available'}, model_update) @@ -3403,6 +3425,8 @@ class PureISCSIDriverTestCase(PureBaseSharedDriverTestCase): self.mock_config.use_chap_auth = False self.driver = pure.PureISCSIDriver(configuration=self.mock_config) self.driver._array = self.array + self.mock_object(self.driver, '_get_current_array', + return_value=self.array) self.driver._storage_protocol = 'iSCSI' self.mock_utils = mock.Mock() self.driver.driver_utils = self.mock_utils @@ -3905,6 +3929,8 @@ class PureFCDriverTestCase(PureBaseSharedDriverTestCase): self.driver = pure.PureFCDriver(configuration=self.mock_config) self.driver._storage_protocol = "FC" self.driver._array = self.array + self.mock_object(self.driver, '_get_current_array', + return_value=self.array) self.driver._lookup_service = mock.Mock() def test_get_host(self): @@ -4445,6 +4471,8 @@ class PureNVMEDriverTestCase(PureBaseSharedDriverTestCase): super(PureNVMEDriverTestCase, self).setUp() self.driver = pure.PureNVMEDriver(configuration=self.mock_config) self.driver._array = self.array + self.mock_object(self.driver, '_get_current_array', + return_value=self.array) self.driver._storage_protocol = 'NVMe-RoCE' self.mock_utils = mock.Mock() self.driver.transport_type = "rdma" diff --git a/cinder/volume/drivers/pure.py b/cinder/volume/drivers/pure.py index fce1fdeae10..116b3d26159 100644 --- a/cinder/volume/drivers/pure.py +++ b/cinder/volume/drivers/pure.py @@ -285,27 +285,32 @@ class PureBaseVolumeDriver(san.SanDriver): uniform = strutils.bool_from_string( replication_device.get("uniform", False)) - target_array = self._get_flasharray( - san_ip, - api_token, - verify_https=verify_https, - ssl_cert_path=ssl_cert_path - ) + try: + target_array = self._get_flasharray( + san_ip, + api_token, + verify_https=verify_https, + ssl_cert_path=ssl_cert_path + ) - target_array_info = target_array.get() - target_array.array_name = target_array_info["array_name"] - target_array.array_id = target_array_info["id"] - target_array.replication_type = repl_type - target_array.backend_id = backend_id - target_array.uniform = uniform + target_array_info = target_array.get() + target_array.array_name = target_array_info["array_name"] + target_array.array_id = target_array_info["id"] + target_array.replication_type = repl_type + target_array.backend_id = backend_id + target_array.uniform = uniform - LOG.info("Added secondary array: backend_id='%s', name='%s'," - " id='%s', type='%s', uniform='%s'", - target_array.backend_id, - target_array.array_name, - target_array.array_id, - target_array.replication_type, - target_array.uniform) + LOG.info("Added secondary array: backend_id='%s'," + " name='%s', id='%s', type='%s', uniform='%s'", + target_array.backend_id, + target_array.array_name, + target_array.array_id, + target_array.replication_type, + target_array.uniform) + except purestorage.PureError as err: + LOG.warning("Failed to set up secondary array with " + "message: %(msg)s", {"msg": err.reason}) + continue self._replication_target_arrays.append(target_array) if repl_type == REPLICATION_TYPE_SYNC: @@ -365,38 +370,44 @@ class PureBaseVolumeDriver(san.SanDriver): # Raises PureDriverException if unable to connect and PureHTTPError # if unable to authenticate. - self._array = self._get_flasharray( - self.configuration.san_ip, - api_token=self.configuration.pure_api_token, - verify_https=self.configuration.driver_ssl_cert_verify, - ssl_cert_path=self.configuration.driver_ssl_cert_path - ) + try: + self._array = self._get_flasharray( + self.configuration.san_ip, + api_token=self.configuration.pure_api_token, + verify_https=self.configuration.driver_ssl_cert_verify, + ssl_cert_path=self.configuration.driver_ssl_cert_path + ) - array_info = self._array.get() - if version.parse(array_info["version"]) < version.parse( - '5.3.0' - ): - msg = _("FlashArray Purity version less than 5.3.0 unsupported." - " Please upgrade your backend to a supported version.") - raise PureDriverException(msg) + array_info = self._array.get() + if version.parse(array_info["version"]) < version.parse( + '5.3.0' + ): + msg = _("FlashArray Purity version less than 5.3.0 " + "unsupported. Please upgrade your backend to " + "a supported version.") + raise PureDriverException(msg) - self._array.array_name = array_info["array_name"] - self._array.array_id = array_info["id"] - self._array.replication_type = None - self._array.backend_id = self._backend_name - self._array.preferred = True - self._array.uniform = True + self._array.array_name = array_info["array_name"] + self._array.array_id = array_info["id"] + self._array.replication_type = None + self._array.backend_id = self._backend_name + self._array.preferred = True + self._array.uniform = True - LOG.info("Primary array: backend_id='%s', name='%s', id='%s'", - self.configuration.config_group, - self._array.array_name, - self._array.array_id) + LOG.info("Primary array: backend_id='%s', name='%s', id='%s'", + self.configuration.config_group, + self._array.array_name, + self._array.array_id) + except purestorage.PureError as err: + LOG.warning("self.do_setup failed to set up primary array with" + " message: %(msg)s", {"msg": err.reason}) self.do_setup_replication() # If we have failed over at some point we need to adjust our current # array based on the one that we have failed over to - if (self._active_backend_id is not None and + if (self._array is not None and + self._active_backend_id is not None and self._active_backend_id != self._array.backend_id): for secondary_array in self._replication_target_arrays: if secondary_array.backend_id == self._active_backend_id: @@ -415,7 +426,7 @@ class PureBaseVolumeDriver(san.SanDriver): # Only set this up on sync rep arrays self._setup_replicated_pods( - self._get_current_array(), + self._get_current_array(True), self._active_cluster_target_arrays, self._replication_pod_name ) @@ -423,7 +434,7 @@ class PureBaseVolumeDriver(san.SanDriver): # Even if the array is configured for sync rep set it # up to handle async too self._setup_replicated_pgroups( - self._get_current_array(), + self._get_current_array(True), self._replication_target_arrays, self._replication_pg_name, self._replication_interval, @@ -551,7 +562,7 @@ class PureBaseVolumeDriver(san.SanDriver): except purestorage.PureError as err: with excutils.save_and_reraise_exception(): LOG.error("Failed to add volume %s to pgroup, removing volume", - err) + err.reason) array.destroy_volume(purity_vol_name) array.eradicate_volume(purity_vol_name) @@ -2527,7 +2538,29 @@ class PureBaseVolumeDriver(san.SanDriver): wwn = '3624a9370' + volume_info['serial'] return wwn.lower() - def _get_current_array(self): + def _get_current_array(self, init=False): + if not init and self._is_active_cluster_enabled: + for target_array in self._active_cluster_target_arrays: + try: + LOG.info("Checking target array %s...", + target_array.array_name) + status_ok = False + pod_info = target_array.get_pod(self._replication_pod_name) + for pod_array in pod_info['arrays']: + if pod_array['array_id'] == target_array.array_id: + if pod_array['status'] == 'online': + status_ok = True + break + if not status_ok: + LOG.warning("Target array is offline. Volume " + "replication in unknown state. Check " + "replication links and array state.") + except purestorage.PureError as err: + LOG.warning("self.get_pod failed with" + " message: %(msg)s", {"msg": err}) + raise purestorage.PureError('No functional arrays ' + 'available') + return self._array def _set_current_array(self, array): @@ -2813,7 +2846,13 @@ class PureFCDriver(PureBaseVolumeDriver, driver.FibreChannelDriver): target_luns = [] target_wwns = [] for array in target_arrays: - connection = self._connect(array, pure_vol_name, connector) + try: + connection = self._connect(array, pure_vol_name, connector) + except purestorage.PureError as err: + # Swallow any exception, just warn and continue + LOG.warning("self._connect failed with" + " message: %(msg)s", {"msg": err.reason}) + continue array_wwns = self._get_array_wwns(array) for wwn in array_wwns: target_wwns.append(wwn) diff --git a/releasenotes/notes/pure-validate-replica-arrays-a76630cab9435770.yaml b/releasenotes/notes/pure-validate-replica-arrays-a76630cab9435770.yaml new file mode 100644 index 00000000000..20e42dfc073 --- /dev/null +++ b/releasenotes/notes/pure-validate-replica-arrays-a76630cab9435770.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Pure Storage FlashArray driver `bug #1969784 + `_: Fixed array failover + incorrectly handles loss of an array due to network issue