PowerMax Driver - U4P failover lock not released on exception

During U4P failover, if the initial operation that triggered the
failover hits an exception it can leave the failover lock in place.
Any operations waiting for lock to clear will continue to loop in place.
Added additional lock releases in exception catches and also a maximum
time that the holding operations should wait.

Change-Id: I43319854df9597df791a83cb4c598db0db3caec7
Closes-bug: 1875640
(cherry picked from commit 4351ace84f)
This commit is contained in:
odonos12 2020-04-28 13:33:20 +01:00 committed by Helen Walsh
parent f6686741e7
commit 13e45234bf
4 changed files with 32 additions and 4 deletions

View File

@ -86,6 +86,19 @@ class PowerMaxRestTest(test.TestCase):
self.rest.request('/fake_uri', 'GET')
mock_fail.assert_called_once()
@mock.patch.object(time, 'sleep')
def test_rest_request_failover_escape(self, mck_sleep):
self.rest.u4p_failover_lock = True
response = tpfo.FakeResponse(200, 'Success')
with mock.patch.object(self.rest, '_handle_u4p_failover')as mock_fail:
with mock.patch.object(self.rest.session, 'request',
side_effect=[requests.ConnectionError,
response]):
self.rest.u4p_failover_enabled = True
self.rest.request('/fake_uri', 'GET')
mock_fail.assert_called_once()
self.rest.u4p_failover_lock = False
def test_wait_for_job_complete(self):
rc, job, status, task = self.rest.wait_for_job_complete(
{'status': 'created', 'jobId': '12345'}, self.data.extra_specs)

View File

@ -123,9 +123,10 @@ class PowerMaxFCDriver(san.SanDriver, driver.FibreChannelDriver):
- Support for multiple replication devices
- Pools bug fix allowing 'None' variants (bug #1873253)
4.2.1 - Concurrent live migrations failure (bug #1875478)
4.2.2 - U4P failover lock not released on exception (#1875640)
"""
VERSION = "4.2.1"
VERSION = "4.2.2"
# ThirdPartySystems wiki
CI_WIKI_NAME = "EMC_VMAX_CI"

View File

@ -128,9 +128,10 @@ class PowerMaxISCSIDriver(san.SanISCSIDriver):
- Support for multiple replication devices
- Pools bug fix allowing 'None' variants (bug #1873253)
4.2.1 - Concurrent live migrations failure (bug #1875478)
4.2.2 - U4P failover lock not released on exception (#1875640)
"""
VERSION = "4.2.1"
VERSION = "4.2.2"
# ThirdPartySystems wiki
CI_WIKI_NAME = "EMC_VMAX_CI"

View File

@ -39,6 +39,7 @@ U4V_VERSION = '91'
MIN_U4P_VERSION = '9.1.0.14'
UCODE_5978 = '5978'
retry_exc_tuple = (exception.VolumeBackendAPIException,)
u4p_failover_max_wait = 120
# HTTP constants
GET = 'GET'
POST = 'POST'
@ -199,11 +200,17 @@ class PowerMaxRest(object):
:raises: VolumeBackendAPIException, Timeout, ConnectionError,
HTTPError, SSLError
"""
while self.u4p_failover_lock and not retry:
waiting_time = 0
while self.u4p_failover_lock and not retry and (
waiting_time < u4p_failover_max_wait):
LOG.warning("Unisphere failover lock in process, holding request "
"until lock is released when Unisphere connection "
"re-established.")
time.sleep(10)
sleeptime = 10
time.sleep(sleeptime)
waiting_time += sleeptime
if waiting_time >= u4p_failover_max_wait:
self.u4p_failover_lock = False
url, message, status_code, response = None, None, None, None
if not self.session:
@ -240,6 +247,8 @@ class PowerMaxRest(object):
"received is: %(status_code)s", {
'status_code': status_code})
message = None
if retry:
self.u4p_failover_lock = False
LOG.debug("%(method)s request to %(url)s has returned with "
"a status code of: %(status_code)s.", {
@ -247,6 +256,8 @@ class PowerMaxRest(object):
'status_code': status_code})
except r_exc.SSLError as e:
if retry:
self.u4p_failover_lock = False
msg = _("The connection to %(base_uri)s has encountered an "
"SSL error. Please check your SSL config or supplied "
"SSL cert in Cinder configuration. SSL Exception "
@ -279,6 +290,8 @@ class PowerMaxRest(object):
'exc_msg': e})
except Exception as e:
if retry:
self.u4p_failover_lock = False
msg = _("The %(method)s request to URL %(url)s failed with "
"exception %(e)s")
LOG.error(msg, {'method': method, 'url': url,