EMC VMAX - Operations and timeout issues

If a volume operation e.g create, delete, expand, etc were to
timeout without fully completing then we were not appropriately
catching the error with the appropriate rc (return code) or
errordesc (Error description). This new method does an additional
check to appropriately capture the status of the job i.e. did it
fail to complete in the allocated time.

Change-Id: I0b74f9941d722083b70c12bbebeb269d315c02b3
Closes-Bug: #1538214
This commit is contained in:
Helen Walsh 2016-01-26 22:26:55 +00:00
parent 092618aab8
commit ae8bb95939
4 changed files with 107 additions and 49 deletions

View File

@ -1172,7 +1172,8 @@ class FakeEcomConnection(object):
else:
jobinstance['JobState'] = 7
jobinstance['ErrorCode'] = 0
jobinstance['ErrorDescription'] = ''
jobinstance['ErrorDescription'] = None
jobinstance['OperationalStatus'] = (2, 17)
return jobinstance
def _getinstance_policycapabilities(self, policycapabilitypath):
@ -2316,8 +2317,9 @@ class EMCVMAXISCSIDriverNoFastTestCase(test.TestCase):
self.driver.utils._is_job_finished = mock.Mock(
return_value=True)
rc = self.driver.utils._wait_for_job_complete(conn, myjob)
self.assertIsNone(rc)
rc, errordesc = self.driver.utils.wait_for_job_complete(conn, myjob)
self.assertEqual(0, rc)
self.assertIsNone(errordesc)
self.driver.utils._is_job_finished.assert_called_once_with(
conn, myjob)
self.assertTrue(self.driver.utils._is_job_finished.return_value)
@ -2326,13 +2328,31 @@ class EMCVMAXISCSIDriverNoFastTestCase(test.TestCase):
# Save the original state and restore it after this test
loopingcall_orig = loopingcall.FixedIntervalLoopingCall
loopingcall.FixedIntervalLoopingCall = mock.Mock()
rc = self.driver.utils._wait_for_job_complete(conn, myjob)
self.assertIsNone(rc)
rc, errordesc = self.driver.utils.wait_for_job_complete(conn, myjob)
self.assertEqual(0, rc)
self.assertIsNone(errordesc)
loopingcall.FixedIntervalLoopingCall.assert_called_once_with(
mock.ANY)
loopingcall.FixedIntervalLoopingCall.reset_mock()
loopingcall.FixedIntervalLoopingCall = loopingcall_orig
def test_wait_for_job_complete_bad_job_state(self):
myjob = SE_ConcreteJob()
myjob.classname = 'SE_ConcreteJob'
myjob['InstanceID'] = '9999'
myjob['status'] = 'success'
myjob['type'] = 'type'
myjob['CreationClassName'] = 'SE_ConcreteJob'
myjob['Job'] = myjob
conn = self.fake_ecom_connection()
self.driver.utils._is_job_finished = mock.Mock(
return_value=True)
self.driver.utils._verify_job_state = mock.Mock(
return_value=(-1, 'Job finished with an error'))
rc, errordesc = self.driver.utils.wait_for_job_complete(conn, myjob)
self.assertEqual(-1, rc)
self.assertEqual('Job finished with an error', errordesc)
def test_wait_for_sync(self):
mysync = 'fakesync'
conn = self.fake_ecom_connection()
@ -3068,6 +3088,10 @@ class EMCVMAXISCSIDriverNoFastTestCase(test.TestCase):
self.driver.delete_volume(notfound_delete_vol)
@mock.patch.object(
emc_vmax_utils.EMCVMAXUtils,
'wait_for_job_complete',
return_value=(-1, 'error'))
@mock.patch.object(
emc_vmax_common.EMCVMAXCommon,
'_get_pool_and_storage_system',
@ -3077,7 +3101,7 @@ class EMCVMAXISCSIDriverNoFastTestCase(test.TestCase):
'get_volume_type_extra_specs',
return_value={'volume_backend_name': 'ISCSINoFAST'})
def test_delete_volume_failed(
self, _mock_volume_type, mock_storage_system):
self, _mock_volume_type, mock_storage_system, mock_wait):
self.driver.create_volume(self.data.failed_delete_vol)
self.assertRaises(exception.VolumeBackendAPIException,
self.driver.delete_volume,
@ -4039,6 +4063,10 @@ class EMCVMAXISCSIDriverFastTestCase(test.TestCase):
notfound_delete_vol['provider_location'] = None
self.driver.delete_volume(notfound_delete_vol)
@mock.patch.object(
emc_vmax_utils.EMCVMAXUtils,
'wait_for_job_complete',
return_value=(-1, 'error'))
@mock.patch.object(
emc_vmax_fast.EMCVMAXFast,
'get_pool_associated_to_policy',
@ -4057,7 +4085,7 @@ class EMCVMAXISCSIDriverFastTestCase(test.TestCase):
return_value={'volume_backend_name': 'ISCSIFAST'})
def test_delete_volume_fast_failed(
self, _mock_volume_type, _mock_storage_group,
mock_storage_system, mock_policy_pool):
mock_storage_system, mock_policy_pool, mock_wait):
self.driver.create_volume(self.data.failed_delete_vol)
self.assertRaises(exception.VolumeBackendAPIException,
self.driver.delete_volume,
@ -4642,6 +4670,10 @@ class EMCVMAXFCDriverNoFastTestCase(test.TestCase):
notfound_delete_vol['provider_location'] = None
self.driver.delete_volume(notfound_delete_vol)
@mock.patch.object(
emc_vmax_utils.EMCVMAXUtils,
'wait_for_job_complete',
return_value=(-1, 'error'))
@mock.patch.object(
emc_vmax_common.EMCVMAXCommon,
'_get_pool_and_storage_system',
@ -4651,7 +4683,7 @@ class EMCVMAXFCDriverNoFastTestCase(test.TestCase):
'get_volume_type_extra_specs',
return_value={'volume_backend_name': 'FCNoFAST'})
def test_delete_volume_failed(
self, _mock_volume_type, mock_storage_system):
self, _mock_volume_type, mock_storage_system, mock_wait):
self.driver.create_volume(self.data.failed_delete_vol)
self.assertRaises(exception.VolumeBackendAPIException,
self.driver.delete_volume,
@ -5228,6 +5260,10 @@ class EMCVMAXFCDriverFastTestCase(test.TestCase):
self.driver.delete_volume(notfound_delete_vol)
@mock.patch.object(
emc_vmax_utils.EMCVMAXUtils,
'wait_for_job_complete',
return_value=(-1, 'error'))
@mock.patch.object(
emc_vmax_fast.EMCVMAXFast,
'get_pool_associated_to_policy',
@ -5246,7 +5282,7 @@ class EMCVMAXFCDriverFastTestCase(test.TestCase):
return_value={'volume_backend_name': 'FCFAST'})
def test_delete_volume_fast_failed(
self, _mock_volume_type, mock_wrapper,
mock_storage_system, mock_pool_policy):
mock_storage_system, mock_pool_policy, mock_wait):
self.driver.create_volume(self.data.failed_delete_vol)
self.assertRaises(exception.VolumeBackendAPIException,
self.driver.delete_volume,

View File

@ -61,6 +61,7 @@ class EMCVMAXFCDriver(driver.FibreChannelDriver):
- Getting iscsi ip from port in existing masking view
- Replacement of EMCGetTargetEndpoints api (bug #1512791)
- VMAX3 snapvx improvements (bug #1522821)
- Operations and timeout issues (bug #1538214)
"""
VERSION = "2.3.0"

View File

@ -67,6 +67,7 @@ class EMCVMAXISCSIDriver(driver.ISCSIDriver):
- Getting iscsi ip from port in existing masking view
- Replacement of EMCGetTargetEndpoints api (bug #1512791)
- VMAX3 snapvx improvements (bug #1522821)
- Operations and timeout issues (bug #1538214)
"""
VERSION = "2.3.0"

View File

@ -283,67 +283,50 @@ class EMCVMAXUtils(object):
:param conn: connection to the ecom server
:param job: the job dict
:param extraSpecs: the extraSpecs dict. Defaults to None
:returns: int -- the return code
:returns: errorDesc - the error description string
"""
jobInstanceName = job['Job']
if extraSpecs and (INTERVAL in extraSpecs or RETRIES in extraSpecs):
self._wait_for_job_complete(conn, job, extraSpecs)
else:
self._wait_for_job_complete(conn, job)
jobinstance = conn.GetInstance(jobInstanceName,
LocalOnly=False)
rc = jobinstance['ErrorCode']
errorDesc = jobinstance['ErrorDescription']
LOG.debug("Return code is: %(rc)lu. "
"Error Description is: %(errorDesc)s.",
{'rc': rc,
'errorDesc': errorDesc})
return rc, errorDesc
def _wait_for_job_complete(self, conn, job, extraSpecs=None):
"""Given the job wait for it to complete.
:param conn: connection to the ecom server
:param job: the job dict
:param extraSpecs: the extraSpecs dict. Defaults to None
:raises: loopingcall.LoopingCallDone
:raises: VolumeBackendAPIException
"""
def _wait_for_job_complete():
# Called at an interval until the job is finished.
maxJobRetries = self._get_max_job_retries(extraSpecs)
retries = kwargs['retries']
wait_for_job_called = kwargs['wait_for_job_called']
if self._is_job_finished(conn, job):
raise loopingcall.LoopingCallDone()
if retries > maxJobRetries:
LOG.error(_LE("_wait_for_job_complete "
"failed after %(retries)d "
"tries."),
{'retries': retries})
raise loopingcall.LoopingCallDone()
try:
kwargs['retries'] = retries + 1
if not wait_for_job_called:
if not kwargs['wait_for_job_called']:
if self._is_job_finished(conn, job):
kwargs['rc'], kwargs['errordesc'] = (
self._verify_job_state(conn, job))
kwargs['wait_for_job_called'] = True
except Exception:
exceptionMessage = (_("Issue encountered waiting for job."))
LOG.exception(exceptionMessage)
raise exception.VolumeBackendAPIException(exceptionMessage)
if retries > maxJobRetries:
kwargs['rc'], kwargs['errordesc'] = (
self._verify_job_state(conn, job))
LOG.error(_LE("_wait_for_job_complete "
"failed after %(retries)d "
"tries."),
{'retries': retries})
raise loopingcall.LoopingCallDone()
if kwargs['wait_for_job_called']:
raise loopingcall.LoopingCallDone()
maxJobRetries = self._get_max_job_retries(extraSpecs)
kwargs = {'retries': 0,
'wait_for_job_called': False}
'wait_for_job_called': False,
'rc': 0,
'errordesc': None}
intervalInSecs = self._get_interval_in_secs(extraSpecs)
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_job_complete)
timer.start(interval=intervalInSecs).wait()
LOG.debug("Return code is: %(rc)lu. "
"Error Description is: %(errordesc)s.",
{'rc': kwargs['rc'],
'errordesc': kwargs['errordesc']})
return kwargs['rc'], kwargs['errordesc']
def _get_max_job_retries(self, extraSpecs):
"""Get max job retries either default or user defined
@ -393,6 +376,43 @@ class EMCVMAXUtils(object):
else:
return True
def _verify_job_state(self, conn, job):
"""Check if the job is finished.
:param conn: connection to the ecom server
:param job: the job dict
:returns: boolean -- True if finished; False if not finished;
"""
jobstatedict = {2: 'New',
3: 'Starting',
4: 'Running',
5: 'Suspended',
6: 'Shutting Down',
7: 'Completed',
8: 'Terminated',
9: 'Killed',
10: 'Exception',
11: 'Service',
32767: 'Queue Pending',
32768: 'DMTF Reserved',
65535: 'Vendor Reserved'}
jobInstanceName = job['Job']
jobinstance = conn.GetInstance(jobInstanceName,
LocalOnly=False)
operationalstatus = jobinstance['OperationalStatus']
if not operationalstatus:
jobstate = jobinstance['JobState']
errordescription = (_(
"The job has not completed and is in a %(state)s "
"state.")
% {'state': jobstatedict[int(jobstate)]})
LOG.error(errordescription)
errorcode = -1
else:
errordescription = jobinstance['ErrorDescription']
errorcode = jobinstance['ErrorCode']
return errorcode, errordescription
def wait_for_sync(self, conn, syncName, extraSpecs=None):
"""Given the sync name wait for it to fully synchronize.