Merge "NetApp SolidFire: Fix duplicate volume when API response is lost" into stable/ussuri

This commit is contained in:
Zuul 2021-03-06 15:01:21 +00:00 committed by Gerrit Code Review
commit 17d3ac3f22
3 changed files with 86 additions and 8 deletions

View File

@ -349,11 +349,14 @@ class SolidFireVolumeTestCase(test.TestCase):
'attributes': {'uuid': f_uuid[1]}, 'attributes': {'uuid': f_uuid[1]},
'qos': None, 'qos': None,
'iqn': test_name}]}} 'iqn': test_name}]}}
if params and params['startVolumeID']: if params and params.get('startVolumeID', None):
volumes = result['result']['volumes'] volumes = result['result']['volumes']
selected_volumes = [v for v in volumes if v.get('volumeID') selected_volumes = [v for v in volumes if v.get('volumeID') !=
!= params['startVolumeID']] params['startVolumeID']]
result['result']['volumes'] = selected_volumes result['result']['volumes'] = selected_volumes
else:
result = {'result': {'volumes': []}}
return result return result
elif method == 'DeleteSnapshot': elif method == 'DeleteSnapshot':
return {'result': {}} return {'result': {}}

View File

@ -88,6 +88,7 @@ sf_opts = [
'\'usedSpace\', the driver will report correct ' '\'usedSpace\', the driver will report correct '
'values as expected by Cinder ' 'values as expected by Cinder '
'thin provisioning.'), 'thin provisioning.'),
cfg.IntOpt('sf_api_request_timeout', cfg.IntOpt('sf_api_request_timeout',
default=30, default=30,
min=30, min=30,
@ -98,7 +99,13 @@ sf_opts = [
min=60, min=60,
help='Sets time in seconds to wait for a clone of a volume or ' help='Sets time in seconds to wait for a clone of a volume or '
'snapshot to complete.' 'snapshot to complete.'
)] ),
cfg.IntOpt('sf_volume_create_timeout',
default=60,
min=30,
help='Sets time in seconds to wait for a create volume '
'operation to complete.')]
CONF = cfg.CONF CONF = cfg.CONF
CONF.register_opts(sf_opts, group=configuration.SHARED_CONF_GROUP) CONF.register_opts(sf_opts, group=configuration.SHARED_CONF_GROUP)
@ -241,9 +248,11 @@ class SolidFireDriver(san.SanISCSIDriver):
- Implement Active/Active replication support - Implement Active/Active replication support
2.2.1 - Fix bug #1891914 fix error on cluster workload rebalancing 2.2.1 - Fix bug #1891914 fix error on cluster workload rebalancing
by adding xNotPrimary to the retryable exception list by adding xNotPrimary to the retryable exception list
2.2.2 - Fix bug #1896112 SolidFire Driver creates duplicate volume
when API response is lost
""" """
VERSION = '2.2.1' VERSION = '2.2.2'
SUPPORTS_ACTIVE_ACTIVE = True SUPPORTS_ACTIVE_ACTIVE = True
@ -939,10 +948,62 @@ class SolidFireDriver(san.SanISCSIDriver):
params['attributes'] = attributes params['attributes'] = attributes
return self._issue_api_request('ModifyVolume', params) return self._issue_api_request('ModifyVolume', params)
def _list_volumes_by_name(self, sf_volume_name):
params = {'volumeName': sf_volume_name}
return self._issue_api_request(
'ListVolumes', params, version='8.0')['result']['volumes']
def _wait_volume_is_active(self, sf_volume_name):
def _wait():
volumes = self._list_volumes_by_name(sf_volume_name)
if volumes:
LOG.debug("Found Volume [%s] in SolidFire backend. "
"Current status is [%s].",
sf_volume_name, volumes[0]['status'])
if volumes[0]['status'] == 'active':
raise loopingcall.LoopingCallDone(volumes[0])
try:
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(
_wait)
sf_volume = (timer.start(
interval=1,
timeout=self.configuration.sf_volume_create_timeout).wait())
return sf_volume
except loopingcall.LoopingCallTimeOut:
msg = ("Timeout while waiting volume [%s] "
"to be in active state." % sf_volume_name)
LOG.error(msg)
raise SolidFireAPIException(msg)
def _do_volume_create(self, sf_account, params, endpoint=None): def _do_volume_create(self, sf_account, params, endpoint=None):
params['accountID'] = sf_account['accountID']
sf_volid = self._issue_api_request( sf_volume_name = params['name']
'CreateVolume', params, endpoint=endpoint)['result']['volumeID'] volumes_found = self._list_volumes_by_name(sf_volume_name)
if volumes_found:
msg = ('Volume name [%s] already exists '
'in SolidFire backend.') % sf_volume_name
LOG.error(msg)
raise DuplicateSfVolumeNames(message=msg)
sf_volid = None
try:
params['accountID'] = sf_account['accountID']
response = self._issue_api_request(
'CreateVolume', params, endpoint=endpoint)
sf_volid = response['result']['volumeID']
except requests.exceptions.ReadTimeout:
LOG.debug("Read Timeout exception caught while creating "
"volume [%s].", sf_volume_name)
# Check if volume was created for the given name,
# in case the backend has processed the request but failed
# to deliver the response before api request timeout.
volume_created = self._wait_volume_is_active(sf_volume_name)
sf_volid = volume_created['volumeID']
return self._get_model_info(sf_account, sf_volid, endpoint=endpoint) return self._get_model_info(sf_account, sf_volid, endpoint=endpoint)
def _do_snapshot_create(self, params): def _do_snapshot_create(self, params):

View File

@ -0,0 +1,14 @@
---
fixes:
- |
NetApp SolidFire driver `Bug #1896112
<https://bugs.launchpad.net/cinder/+bug/1896112>`_:
Fixes an issue that may duplicate volumes during creation, in case
the SolidFire backend successfully processes a request and creates
the volume, but fails to deliver the result back to the driver (the
response is lost). When this scenario occurs, the SolidFire driver
will retry the operation, which previously resulted in the creation
of a duplicate volume. This fix adds the ``sf_volume_create_timeout``
configuration option (default value: 60 seconds) which specifies an
additional length of time that the driver will wait for the volume to
become active on the backend before raising an exception.