From 44aea00c70788cff034fa18d5e119b17923a657e Mon Sep 17 00:00:00 2001 From: michael-mcaleer Date: Thu, 24 May 2018 14:32:24 +0100 Subject: [PATCH] VMAX Driver - Failover Unisphere Support VMAX support for failing over to backup instances of Unisphere. Change-Id: I72354ace0de850e162c5ce804503a62f7161c07f Implements: blueprint vmax-unisphere-failover --- .../volume/drivers/dell_emc/vmax/test_vmax.py | 182 ++++++++++++++++- cinder/volume/drivers/dell_emc/vmax/common.py | 112 +++++++++- cinder/volume/drivers/dell_emc/vmax/fc.py | 2 + cinder/volume/drivers/dell_emc/vmax/iscsi.py | 2 + cinder/volume/drivers/dell_emc/vmax/rest.py | 191 +++++++++++++++--- cinder/volume/drivers/dell_emc/vmax/utils.py | 5 + ...x-failover-unisphere-2de78d1f76b5f836.yaml | 4 + 7 files changed, 464 insertions(+), 34 deletions(-) create mode 100644 releasenotes/notes/vmax-failover-unisphere-2de78d1f76b5f836.yaml diff --git a/cinder/tests/unit/volume/drivers/dell_emc/vmax/test_vmax.py b/cinder/tests/unit/volume/drivers/dell_emc/vmax/test_vmax.py index 9cdf6659e99..5877e5ff713 100644 --- a/cinder/tests/unit/volume/drivers/dell_emc/vmax/test_vmax.py +++ b/cinder/tests/unit/volume/drivers/dell_emc/vmax/test_vmax.py @@ -984,6 +984,43 @@ class VMAXCommonData(object): 'serial_number': array, 'vmax_model': vmax_model} + u4p_failover_config = { + 'u4p_failover_backoff_factor': '2', + 'u4p_failover_retries': '3', + 'u4p_failover_timeout': '10', + 'u4p_primary': '10.10.10.10', + 'u4p_failover_autofailback': 'True', + 'u4p_failover_targets': [ + {'san_ip': '10.10.10.11', + 'san_api_port': '8443', + 'san_login': 'test', + 'san_password': 'test', + 'driver_ssl_cert_verify': '/path/to/cert', + 'driver_ssl_cert_path': 'True'}, + {'san_ip': '10.10.10.12', + 'san_api_port': '8443', + 'san_login': 'test', + 'san_password': 'test', + 'driver_ssl_cert_verify': 'True'}, + {'san_ip': '10.10.10.11', + 'san_api_port': '8443', + 'san_login': 'test', + 'san_password': 'test', + 'driver_ssl_cert_verify': '/path/to/cert', + 'driver_ssl_cert_path': 'False'}]} + + u4p_failover_target = [{ + 'RestServerIp': '10.10.10.11', + 'RestServerPort': '8443', + 'RestUserName': 'test', + 'RestPassword': 'test', + 'SSLVerify': '/path/to/cert'}, + {'RestServerIp': '10.10.10.12', + 'RestServerPort': '8443', + 'RestUserName': 'test', + 'RestPassword': 'test', + 'SSLVerify': 'True'}] + class FakeLookupService(object): def get_device_mapping_from_network(self, initiator_wwns, target_wwns): @@ -1002,6 +1039,15 @@ class FakeResponse(object): else: raise ValueError + def status_code(self): + return self.status_code() + + def raise_for_status(self): + if 200 <= self.status_code <= 204: + return False + else: + return True + class FakeRequestsSession(object): @@ -1026,6 +1072,18 @@ class FakeRequestsSession(object): elif method == 'EXCEPTION': raise Exception + elif method == 'CONNECTION': + raise requests.ConnectionError + + elif method == 'HTTP': + raise requests.HTTPError + + elif method == 'SSL': + raise requests.exceptions.SSLError + + elif method == 'EXCEPTION': + raise exception.VolumeBackendAPIException + return FakeResponse(status_code, return_object) def _get_request(self, url, params): @@ -1207,6 +1265,9 @@ class FakeRequestsSession(object): def session(self): return FakeRequestsSession() + def close(self): + pass + class FakeConfiguration(object): @@ -1218,7 +1279,6 @@ class FakeConfiguration(object): self.volume_backend_name = volume_backend_name self.config_group = volume_backend_name self.san_is_local = False - self.max_over_subscription_ratio = 1 if replication_device: self.replication_device = [replication_device] for key, value in kwargs.items(): @@ -1252,6 +1312,16 @@ class FakeConfiguration(object): self.driver_ssl_cert_verify = value elif key == 'driver_ssl_cert_path': self.driver_ssl_cert_path = value + elif key == 'u4p_failover_target': + self.u4p_failover_target = value + elif key == 'u4p_failover_backoff_factor': + self.u4p_failover_backoff_factor = value + elif key == 'u4p_failover_retries': + self.u4p_failover_retries = value + elif key == 'u4p_failover_timeout': + self.u4p_failover_timeout = value + elif key == 'u4p_primary': + self.u4p_primary = value def safe_get(self, key): try: @@ -1747,13 +1817,43 @@ class VMAXRestTest(test.TestCase): self.rest = self.common.rest self.utils = self.common.utils - def test_rest_request_exception(self): - sc, msg = self.rest.request('/fake_url', 'TIMEOUT') - self.assertIsNone(sc) - self.assertIsNone(msg) + def test_rest_request_no_response(self): + with mock.patch.object(self.rest.session, 'request', + return_value=FakeResponse(None, None)): + sc, msg = self.rest.request('TIMEOUT', '/fake_url') + self.assertIsNone(sc) + self.assertIsNone(msg) + + def test_rest_request_timeout_exception(self): + self.assertRaises(requests.exceptions.Timeout, + self.rest.request, '', 'TIMEOUT') + + def test_rest_request_connection_exception(self): + self.assertRaises(requests.exceptions.ConnectionError, + self.rest.request, '', 'CONNECTION') + + def test_rest_request_http_exception(self): + self.assertRaises(requests.exceptions.HTTPError, + self.rest.request, '', 'HTTP') + + def test_rest_request_ssl_exception(self): + self.assertRaises(requests.exceptions.SSLError, + self.rest.request, '', 'SSL') + + def test_rest_request_undefined_exception(self): self.assertRaises(exception.VolumeBackendAPIException, self.rest.request, '', 'EXCEPTION') + def test_rest_request_handle_failover(self): + response = FakeResponse(200, 'Success') + with mock.patch.object(self.rest, '_handle_u4p_failover'): + with mock.patch.object(self.rest.session, 'request', + side_effect=[requests.ConnectionError, + response]): + self.rest.u4p_failover_enabled = True + self.rest.request('/fake_uri', 'GET') + self.rest._handle_u4p_failover.assert_called_once() + def test_wait_for_job_complete(self): rc, job, status, task = self.rest.wait_for_job_complete( {'status': 'created', 'jobId': '12345'}, self.data.extra_specs) @@ -3292,6 +3392,35 @@ class VMAXRestTest(test.TestCase): self.assertEqual(self.rest.get_vmax_model(self.data.array), reference) + def test_set_u4p_failover_config(self): + self.rest.set_u4p_failover_config(self.data.u4p_failover_config) + + self.assertTrue(self.rest.u4p_failover_enabled) + self.assertEqual('3', self.rest.u4p_failover_retries) + self.assertEqual('10', self.rest.u4p_failover_timeout) + self.assertEqual('2', self.rest.u4p_failover_backoff_factor) + self.assertEqual('10.10.10.10', self.rest.primary_u4p) + self.assertEqual('10.10.10.11', + self.rest.u4p_failover_targets[0]['san_ip']) + self.assertEqual('10.10.10.12', + self.rest.u4p_failover_targets[1]['san_ip']) + + def test_handle_u4p_failover_with_targets(self): + self.rest.u4p_failover_targets = self.data.u4p_failover_target + self.rest._handle_u4p_failover() + + self.assertTrue(self.rest.u4p_in_failover) + self.assertEqual('test', self.rest.user) + self.assertEqual('test', self.rest.passwd) + self.assertEqual('/path/to/cert', self.rest.verify) + self.assertEqual('https://10.10.10.11:8443/univmax/restapi', + self.rest.base_uri) + + def test_handle_u4p_failover_no_targets_exception(self): + self.rest.u4p_failover_targets = [] + self.assertRaises(exception.VolumeBackendAPIException, + self.rest._handle_u4p_failover) + class VMAXProvisionTest(test.TestCase): def setUp(self): @@ -6019,6 +6148,48 @@ class VMAXCommonTest(test.TestCase): response4 = self.common.get_attributes_from_cinder_config() self.assertEqual(expected_response, response4) + def test_get_u4p_failover_info(self): + configuration = FakeConfiguration( + None, 'CommonTests', 1, 1, + san_ip='1.1.1.1', + san_login='test', + san_password='test', + san_api_port=8443, + driver_ssl_cert_verify='/path/to/cert', + u4p_failover_target=(self.data.u4p_failover_config[ + 'u4p_failover_targets']), + u4p_failover_backoff_factor='2', + u4p_failover_retries='3', + u4p_failover_timeout='10', + u4p_primary='10.10.10.10' + ) + self.common.configuration = configuration + self.common._get_u4p_failover_info() + self.assertTrue(self.rest.u4p_failover_enabled) + self.assertIsNotNone(self.rest.u4p_failover_targets) + + def test_update_vol_stats_retest_u4p(self): + self.rest.u4p_in_failover = True + self.rest.u4p_failover_autofailback = True + with mock.patch.object(self.common, 'retest_primary_u4p'): + self.common.update_volume_stats() + self.common.retest_primary_u4p.assert_called_once() + + self.rest.u4p_in_failover = True + self.rest.u4p_failover_autofailback = False + with mock.patch.object(self.common, 'retest_primary_u4p'): + self.common.update_volume_stats() + self.common.retest_primary_u4p.assert_not_called() + + @mock.patch.object(rest.VMAXRest, 'request', + return_value=[200, None]) + @mock.patch.object(common.VMAXCommon, + 'get_attributes_from_cinder_config', + return_value=VMAXCommonData.u4p_failover_target[0]) + def test_retest_primary_u4p(self, mock_primary_u4p, mock_request): + self.common.retest_primary_u4p() + self.assertFalse(self.rest.u4p_in_failover) + class VMAXFCTest(test.TestCase): def setUp(self): @@ -6596,6 +6767,7 @@ class VMAXMaskingTest(test.TestCase): configuration.safe_get.return_value = 'MaskingTests' configuration.config_group = 'MaskingTests' self._gather_info = common.VMAXCommon._gather_info + common.VMAXCommon._get_u4p_failover_info = mock.Mock() common.VMAXCommon._gather_info = mock.Mock() rest.VMAXRest._establish_rest_session = mock.Mock( return_value=FakeRequestsSession()) diff --git a/cinder/volume/drivers/dell_emc/vmax/common.py b/cinder/volume/drivers/dell_emc/vmax/common.py index bd50cfa8f12..b52651fd581 100644 --- a/cinder/volume/drivers/dell_emc/vmax/common.py +++ b/cinder/volume/drivers/dell_emc/vmax/common.py @@ -21,6 +21,7 @@ import sys import time from oslo_config import cfg +from oslo_config import types from oslo_log import log as logging from oslo_utils import strutils import six @@ -94,7 +95,32 @@ vmax_opts = [ cfg.ListOpt(utils.VMAX_PORT_GROUPS, bounds=True, help='List of port groups containing frontend ports ' - 'configured prior for server connection.')] + 'configured prior for server connection.'), + cfg.IntOpt(utils.U4P_FAILOVER_TIMEOUT, + default=20.0, + help='How long to wait for the server to send data before ' + 'giving up.'), + cfg.IntOpt(utils.U4P_FAILOVER_RETRIES, + default=3, + help='The maximum number of retries each connection should ' + 'attempt. Note, this applies only to failed DNS lookups, ' + 'socket connections and connection timeouts, never to ' + 'requests where data has made it to the server.'), + cfg.IntOpt(utils.U4P_FAILOVER_BACKOFF_FACTOR, + default=1, + help='A backoff factor to apply between attempts after the ' + 'second try (most errors are resolved immediately by a ' + 'second try without a delay). Retries will sleep for: ' + '{backoff factor} * (2 ^ ({number of total retries} - 1)) ' + 'seconds.'), + cfg.BoolOpt(utils.U4P_FAILOVER_AUTOFAILBACK, + default=True, + help='If the driver should automatically failback to the ' + 'primary instance of Unisphere when a successful ' + 'connection is re-established.'), + cfg.MultiOpt(utils.U4P_FAILOVER_TARGETS, + item_type=types.Dict(), + help='Dictionary of Unisphere failover target info.')] CONF.register_opts(vmax_opts, group=configuration.SHARED_CONF_GROUP) @@ -134,6 +160,7 @@ class VMAXCommon(object): self.active_backend_id = active_backend_id self.failover = False self._get_replication_info() + self._get_u4p_failover_info() self._gather_info() self.version_dict = {} self.nextGen = False @@ -174,6 +201,84 @@ class VMAXCommon(object): {'emcConfigFileName': self.pool_info['config_file'], 'backendName': self.pool_info['backend_name']}) + def _get_u4p_failover_info(self): + """Gather Unisphere failover target information, if provided.""" + + key_dict = {'san_ip': 'RestServerIp', + 'san_api_port': 'RestServerPort', + 'san_login': 'RestUserName', + 'san_password': 'RestPassword', + 'driver_ssl_cert_verify': 'SSLVerify', + 'driver_ssl_cert_path': 'SSLPath'} + + if self.configuration.safe_get('u4p_failover_target'): + u4p_targets = self.configuration.safe_get('u4p_failover_target') + formatted_target_list = list() + for target in u4p_targets: + formatted_target = {key_dict[key]: value for key, value in + target.items()} + + try: + formatted_target['SSLVerify'] = formatted_target['SSLPath'] + del formatted_target['SSLPath'] + except KeyError: + if formatted_target['SSLVerify'] == 'False': + formatted_target['SSLVerify'] = False + else: + formatted_target['SSLVerify'] = True + + formatted_target_list.append(formatted_target) + + u4p_failover_config = dict() + u4p_failover_config['u4p_failover_targets'] = formatted_target_list + u4p_failover_config['u4p_failover_backoff_factor'] = ( + self.configuration.safe_get('u4p_failover_backoff_factor')) + u4p_failover_config['u4p_failover_retries'] = ( + self.configuration.safe_get('u4p_failover_retries')) + u4p_failover_config['u4p_failover_timeout'] = ( + self.configuration.safe_get('u4p_failover_timeout')) + u4p_failover_config['u4p_failover_autofailback'] = ( + self.configuration.safe_get('u4p_failover_autofailback')) + u4p_failover_config['u4p_primary'] = ( + self.get_attributes_from_cinder_config()) + + self.rest.set_u4p_failover_config(u4p_failover_config) + else: + LOG.warning("There has been no failover instances of Unisphere " + "configured for this instance of Cinder. If your " + "primary instance of Unisphere goes down then your " + "VMAX will be inaccessible until the Unisphere REST " + "API is responsive again.") + + def retest_primary_u4p(self): + """Retest connection to the primary instance of Unisphere.""" + primary_array_info = self.get_attributes_from_cinder_config() + temp_conn = rest.VMAXRest() + temp_conn.set_rest_credentials(primary_array_info) + LOG.debug( + "Running connection check to primary instance of Unisphere " + "at %(primary)s", { + 'primary': primary_array_info['RestServerIp']}) + sc, response = temp_conn.request(target_uri='/system/version', + method='GET', u4p_check=True, + request_object=None) + if sc and int(sc) == 200: + self._get_u4p_failover_info() + self.rest.set_rest_credentials(primary_array_info) + self.rest.u4p_in_failover = False + LOG.info("Connection to primary instance of Unisphere at " + "%(primary)s restored, available failover instances of " + "Unisphere reset to default.", { + 'primary': primary_array_info['RestServerIp']}) + else: + LOG.debug( + "Connection check to primary instance of Unisphere at " + "%(primary)s failed, maintaining session with backup " + "instance of Unisphere at %(bu_in_use)s", { + 'primary': primary_array_info['RestServerIp'], + 'bu_in_use': self.rest.base_uri}) + temp_conn.session.close() + def _get_initiator_check_flag(self): """Reads the configuration for initator_check flag. @@ -870,6 +975,8 @@ class VMAXCommon(object): def update_volume_stats(self): """Retrieve stats info.""" + if self.rest.u4p_in_failover and self.rest.u4p_failover_autofailback: + self.retest_primary_u4p() pools = [] # Dictionary to hold the arrays for which the SRP details # have already been queried. @@ -891,7 +998,6 @@ class VMAXCommon(object): self.rep_config, array_info) # Add both SLO & Workload name in the pool name # Only insert the array details in the dict once - self.rest.set_rest_credentials(array_info) if array_info['SerialNumber'] not in arrays: (location_info, total_capacity_gb, free_capacity_gb, provisioned_capacity_gb, @@ -1262,8 +1368,6 @@ class VMAXCommon(object): raise exception.VolumeBackendAPIException( message=exception_message) - self.rest.set_rest_credentials(array_info) - extra_specs = self._set_vmax_extra_specs(extra_specs, array_info) if qos_specs and qos_specs.get('consumer') != "front-end": extra_specs['qos'] = qos_specs.get('specs') diff --git a/cinder/volume/drivers/dell_emc/vmax/fc.py b/cinder/volume/drivers/dell_emc/vmax/fc.py index 54e5920c59e..ac971fb5e51 100644 --- a/cinder/volume/drivers/dell_emc/vmax/fc.py +++ b/cinder/volume/drivers/dell_emc/vmax/fc.py @@ -101,6 +101,8 @@ class VMAXFCDriver(san.SanDriver, driver.FibreChannelDriver): 3.3.0 - Fix for initiator retrieval and short hostname unmapping (bugs #1783855 #1783867) - Fix for HyperMax OS Upgrade Bug (bug #1790141) + - Support for failover to secondary Unisphere + (bp/vmax-unisphere-failover) """ VERSION = "3.3.0" diff --git a/cinder/volume/drivers/dell_emc/vmax/iscsi.py b/cinder/volume/drivers/dell_emc/vmax/iscsi.py index e07c2ff999d..c3f218cb0a9 100644 --- a/cinder/volume/drivers/dell_emc/vmax/iscsi.py +++ b/cinder/volume/drivers/dell_emc/vmax/iscsi.py @@ -106,6 +106,8 @@ class VMAXISCSIDriver(san.SanISCSIDriver): 3.3.0 - Fix for initiator retrieval and short hostname unmapping (bugs #1783855 #1783867) - Fix for HyperMax OS Upgrade Bug (bug #1790141) + - Support for failover to secondary Unisphere + (bp/vmax-unisphere-failover) """ VERSION = "3.3.0" diff --git a/cinder/volume/drivers/dell_emc/vmax/rest.py b/cinder/volume/drivers/dell_emc/vmax/rest.py index d3599289482..94d51d8a889 100644 --- a/cinder/volume/drivers/dell_emc/vmax/rest.py +++ b/cinder/volume/drivers/dell_emc/vmax/rest.py @@ -14,13 +14,17 @@ # under the License. import json +import sys +import time from oslo_log import log as logging from oslo_service import loopingcall from oslo_utils import units import requests import requests.auth +import requests.exceptions as r_exc import requests.packages.urllib3.exceptions as urllib_exp +import requests.packages.urllib3.util.retry as requests_retry import six from cinder import coordination @@ -47,6 +51,7 @@ STATUS_200 = 200 STATUS_201 = 201 STATUS_202 = 202 STATUS_204 = 204 +SERVER_ERROR_STATUS_CODES = [408, 501, 502, 503, 504] # Job constants INCOMPLETE_LIST = ['created', 'unscheduled', 'scheduled', 'running', 'validating', 'validated'] @@ -66,6 +71,16 @@ class VMAXRest(object): self.passwd = None self.verify = None self.cert = None + # Failover Unisphere configuration + self.primary_u4p = None + self.u4p_failover_enabled = False + self.u4p_failover_autofailback = True + self.u4p_failover_targets = list() + self.u4p_failover_retries = 3 + self.u4p_failover_timeout = 30 + self.u4p_failover_backoff_factor = 1 + self.u4p_in_failover = False + self.u4p_failover_lock = False def set_rest_credentials(self, array_info): """Given the array record set the rest server credentials. @@ -82,72 +97,198 @@ class VMAXRest(object): 'ip_port': ip_port}) self.session = self._establish_rest_session() + def set_u4p_failover_config(self, failover_info): + """Set the environment failover Unisphere targets and configuration.. + + :param failover_info: failover target record + :return: + """ + self.u4p_failover_enabled = True + self.primary_u4p = failover_info['u4p_primary'] + self.u4p_failover_targets = failover_info['u4p_failover_targets'] + + if failover_info['u4p_failover_retries']: + self.u4p_failover_retries = failover_info['u4p_failover_retries'] + if failover_info['u4p_failover_timeout']: + self.u4p_failover_timeout = failover_info['u4p_failover_timeout'] + if failover_info['u4p_failover_backoff_factor']: + self.u4p_failover_backoff_factor = failover_info[ + 'u4p_failover_backoff_factor'] + if failover_info['u4p_failover_autofailback']: + self.u4p_failover_autofailback = failover_info[ + 'u4p_failover_autofailback'] + def _establish_rest_session(self): """Establish the rest session. :returns: requests.session() -- session, the rest session """ + LOG.info("Establishing REST session with %(base_uri)s", + {'base_uri': self.base_uri}) + if self.session: + self.session.close() session = requests.session() session.headers = {'content-type': 'application/json', 'accept': 'application/json', 'Application-Type': 'openstack'} session.auth = requests.auth.HTTPBasicAuth(self.user, self.passwd) + if self.verify is not None: session.verify = self.verify + # SESSION FAILOVER CONFIGURATION + if self.u4p_failover_enabled: + timeout = self.u4p_failover_timeout + + class MyHTTPAdapter(requests.adapters.HTTPAdapter): + def send(self, *args, **kwargs): + kwargs['timeout'] = timeout + return super(MyHTTPAdapter, self).send(*args, **kwargs) + + retry = requests_retry.Retry( + total=self.u4p_failover_retries, + backoff_factor=self.u4p_failover_backoff_factor, + status_forcelist=SERVER_ERROR_STATUS_CODES) + adapter = MyHTTPAdapter(max_retries=retry) + session.mount('https://', adapter) + session.mount('http://', adapter) + return session - def request(self, target_uri, method, params=None, request_object=None): + def _handle_u4p_failover(self): + """Handle the failover process to secondary instance of Unisphere. + + :raises: VolumeBackendAPIException + """ + if self.u4p_failover_targets: + LOG.error("Unisphere failure at %(prim)s, switching to next " + "backup instance of Unisphere at %(sec)s", { + 'prim': self.base_uri, + 'sec': self.u4p_failover_targets[0][ + 'RestServerIp']}) + self.set_rest_credentials(self.u4p_failover_targets[0]) + self.u4p_failover_targets.pop(0) + if self.u4p_in_failover: + LOG.warning("VMAX driver still in u4p failover mode. A " + "periodic check will be made to see if primary " + "Unisphere comes back online for seamless " + "restoration.") + else: + LOG.warning("VMAX driver set to u4p failover mode. A periodic " + "check will be made to see if primary Unisphere " + "comes back online for seamless restoration.") + self.u4p_in_failover = True + else: + msg = _("A connection could not be established with the " + "primary instance of Unisphere or any of the " + "specified failover instances of Unisphere. Please " + "check your local environment setup and restart " + "Cinder Volume service to revert back to the primary " + "Unisphere instance.") + self.u4p_failover_lock = False + raise exception.VolumeBackendAPIException(data=msg) + + def request(self, target_uri, method, params=None, request_object=None, + u4p_check=False, retry=False): """Sends a request (GET, POST, PUT, DELETE) to the target api. :param target_uri: target uri (string) :param method: The method (GET, POST, PUT, or DELETE) :param params: Additional URL parameters :param request_object: request payload (dict) + :param u4p_check: if request is testing connection (boolean) + :param retry: if request is retry from prior failed request (boolean) :returns: server response object (dict) - :raises: VolumeBackendAPIException + :raises: VolumeBackendAPIException, Timeout, ConnectionError, + HTTPError, SSLError """ - message, status_code = None, None + while self.u4p_failover_lock and not retry: + LOG.warning("Unisphere failover lock in process, holding request " + "until lock is released when Unisphere connection " + "re-established.") + time.sleep(10) + + url, message, status_code, response = None, None, None, None if not self.session: self.session = self._establish_rest_session() - url = ("%(self.base_uri)s%(target_uri)s" % - {'self.base_uri': self.base_uri, - 'target_uri': target_uri}) + try: + url = ("%(self.base_uri)s%(target_uri)s" % { + 'self.base_uri': self.base_uri, + 'target_uri': target_uri}) + if request_object: response = self.session.request( method=method, url=url, data=json.dumps(request_object, sort_keys=True, indent=4)) elif params: - response = self.session.request(method=method, url=url, - params=params) + response = self.session.request( + method=method, url=url, params=params) else: - response = self.session.request(method=method, url=url) + response = self.session.request( + method=method, url=url) + status_code = response.status_code + if retry and status_code and status_code in [STATUS_200, + STATUS_201, + STATUS_202, + STATUS_204]: + self.u4p_failover_lock = False + try: message = response.json() except ValueError: LOG.debug("No response received from API. Status code " - "received is: %(status_code)s", - {'status_code': status_code}) + "received is: %(status_code)s", { + 'status_code': status_code}) message = None - LOG.debug("%(method)s request to %(url)s has returned with " - "a status code of: %(status_code)s.", - {'method': method, 'url': url, - 'status_code': status_code}) - except requests.Timeout: - LOG.error("The %(method)s request to URL %(url)s timed-out, " - "but may have been successful. Please check the array.", - {'method': method, 'url': url}) + LOG.debug("%(method)s request to %(url)s has returned with " + "a status code of: %(status_code)s.", { + 'method': method, 'url': url, + 'status_code': status_code}) + + except r_exc.SSLError as e: + msg = _("The connection to %(base_uri)s has encountered an " + "SSL error. Please check your SSL config or supplied " + "SSL cert in Cinder configuration. SSL Exception " + "message: %(e)s") + raise r_exc.SSLError(msg, {'base_uri': self.base_uri, 'e': e}) + + except (r_exc.Timeout, r_exc.ConnectionError, + r_exc.HTTPError) as e: + if self.u4p_failover_enabled or u4p_check: + if not u4p_check: + # Failover process + LOG.warning("Running failover to backup instance " + "of Unisphere") + self.u4p_failover_lock = True + self._handle_u4p_failover() + # Failover complete, re-run failed operation + LOG.info("Running request again to backup instance of " + "Unisphere") + status_code, message = self.request( + target_uri, method, params, request_object, retry=True) + elif not self.u4p_failover_enabled: + exc_class, __, __ = sys.exc_info() + msg = _("The %(method)s to Unisphere server %(base)s has " + "experienced a %(error)s error. Please check your " + "Unisphere server connection/availability. " + "Exception message: %(exc_msg)s") + raise exc_class(msg, {'method': method, + 'base': self.base_uri, + 'error': e.__class__.__name__, + 'exc_msg': e}) + except Exception as e: - exception_message = (_("The %(method)s request to URL %(url)s " - "failed with exception %(e)s") - % {'method': method, 'url': url, - 'e': six.text_type(e)}) - LOG.exception(exception_message) - raise exception.VolumeBackendAPIException(data=exception_message) + msg = _("The %(method)s request to URL %(url)s failed with " + "exception %(e)s") + LOG.exception(msg, {'method': method, 'url': url, + 'e': six.text_type(e)}) + raise exception.VolumeBackendAPIException( + data=(msg, {'method': method, 'url': url, + 'e': six.text_type(e)})) return status_code, message diff --git a/cinder/volume/drivers/dell_emc/vmax/utils.py b/cinder/volume/drivers/dell_emc/vmax/utils.py index e42b773f29c..1613721a7bc 100644 --- a/cinder/volume/drivers/dell_emc/vmax/utils.py +++ b/cinder/volume/drivers/dell_emc/vmax/utils.py @@ -96,6 +96,11 @@ VMAX_SRP = 'vmax_srp' VMAX_SERVICE_LEVEL = 'vmax_service_level' VMAX_PORT_GROUPS = 'vmax_port_groups' VMAX_SNAPVX_UNLINK_LIMIT = 'vmax_snapvx_unlink_limit' +U4P_FAILOVER_TIMEOUT = 'u4p_failover_timeout' +U4P_FAILOVER_RETRIES = 'u4p_failover_retries' +U4P_FAILOVER_BACKOFF_FACTOR = 'u4p_failover_backoff_factor' +U4P_FAILOVER_AUTOFAILBACK = 'u4p_failover_autofailback' +U4P_FAILOVER_TARGETS = 'u4p_failover_target' class VMAXUtils(object): diff --git a/releasenotes/notes/vmax-failover-unisphere-2de78d1f76b5f836.yaml b/releasenotes/notes/vmax-failover-unisphere-2de78d1f76b5f836.yaml new file mode 100644 index 00000000000..ba487374f9e --- /dev/null +++ b/releasenotes/notes/vmax-failover-unisphere-2de78d1f76b5f836.yaml @@ -0,0 +1,4 @@ +--- +features: + - Dell EMC VMAX driver has added support for failover to second + instance of Unisphere.