diff --git a/cinder/tests/unit/volume/drivers/solidfire/test_solidfire.py b/cinder/tests/unit/volume/drivers/solidfire/test_solidfire.py index 11d599c2dad..bfdf74e3cdc 100644 --- a/cinder/tests/unit/volume/drivers/solidfire/test_solidfire.py +++ b/cinder/tests/unit/volume/drivers/solidfire/test_solidfire.py @@ -349,11 +349,14 @@ class SolidFireVolumeTestCase(test.TestCase): 'attributes': {'uuid': f_uuid[1]}, 'qos': None, 'iqn': test_name}]}} - if params and params['startVolumeID']: + if params and params.get('startVolumeID', None): volumes = result['result']['volumes'] - selected_volumes = [v for v in volumes if v.get('volumeID') - != params['startVolumeID']] + selected_volumes = [v for v in volumes if v.get('volumeID') != + params['startVolumeID']] result['result']['volumes'] = selected_volumes + else: + result = {'result': {'volumes': []}} + return result elif method == 'DeleteSnapshot': return {'result': {}} diff --git a/cinder/volume/drivers/solidfire.py b/cinder/volume/drivers/solidfire.py index a8eb5349c55..c84258a6bfc 100644 --- a/cinder/volume/drivers/solidfire.py +++ b/cinder/volume/drivers/solidfire.py @@ -88,6 +88,7 @@ sf_opts = [ '\'usedSpace\', the driver will report correct ' 'values as expected by Cinder ' 'thin provisioning.'), + cfg.IntOpt('sf_api_request_timeout', default=30, min=30, @@ -98,7 +99,13 @@ sf_opts = [ min=60, help='Sets time in seconds to wait for a clone of a volume or ' 'snapshot to complete.' - )] + ), + + cfg.IntOpt('sf_volume_create_timeout', + default=60, + min=30, + help='Sets time in seconds to wait for a create volume ' + 'operation to complete.')] CONF = cfg.CONF CONF.register_opts(sf_opts, group=configuration.SHARED_CONF_GROUP) @@ -241,9 +248,11 @@ class SolidFireDriver(san.SanISCSIDriver): - Implement Active/Active replication support 2.2.1 - Fix bug #1891914 fix error on cluster workload rebalancing by adding xNotPrimary to the retryable exception list + 2.2.2 - Fix bug #1896112 SolidFire Driver creates duplicate volume + when API response is lost """ - VERSION = '2.2.1' + VERSION = '2.2.2' SUPPORTS_ACTIVE_ACTIVE = True @@ -939,10 +948,62 @@ class SolidFireDriver(san.SanISCSIDriver): params['attributes'] = attributes return self._issue_api_request('ModifyVolume', params) + def _list_volumes_by_name(self, sf_volume_name): + params = {'volumeName': sf_volume_name} + return self._issue_api_request( + 'ListVolumes', params, version='8.0')['result']['volumes'] + + def _wait_volume_is_active(self, sf_volume_name): + + def _wait(): + volumes = self._list_volumes_by_name(sf_volume_name) + if volumes: + LOG.debug("Found Volume [%s] in SolidFire backend. " + "Current status is [%s].", + sf_volume_name, volumes[0]['status']) + if volumes[0]['status'] == 'active': + raise loopingcall.LoopingCallDone(volumes[0]) + + try: + timer = loopingcall.FixedIntervalWithTimeoutLoopingCall( + _wait) + sf_volume = (timer.start( + interval=1, + timeout=self.configuration.sf_volume_create_timeout).wait()) + + return sf_volume + except loopingcall.LoopingCallTimeOut: + msg = ("Timeout while waiting volume [%s] " + "to be in active state." % sf_volume_name) + LOG.error(msg) + raise SolidFireAPIException(msg) + def _do_volume_create(self, sf_account, params, endpoint=None): - params['accountID'] = sf_account['accountID'] - sf_volid = self._issue_api_request( - 'CreateVolume', params, endpoint=endpoint)['result']['volumeID'] + + sf_volume_name = params['name'] + volumes_found = self._list_volumes_by_name(sf_volume_name) + if volumes_found: + msg = ('Volume name [%s] already exists ' + 'in SolidFire backend.') % sf_volume_name + LOG.error(msg) + raise DuplicateSfVolumeNames(message=msg) + + sf_volid = None + try: + params['accountID'] = sf_account['accountID'] + response = self._issue_api_request( + 'CreateVolume', params, endpoint=endpoint) + sf_volid = response['result']['volumeID'] + + except requests.exceptions.ReadTimeout: + LOG.debug("Read Timeout exception caught while creating " + "volume [%s].", sf_volume_name) + # Check if volume was created for the given name, + # in case the backend has processed the request but failed + # to deliver the response before api request timeout. + volume_created = self._wait_volume_is_active(sf_volume_name) + sf_volid = volume_created['volumeID'] + return self._get_model_info(sf_account, sf_volid, endpoint=endpoint) def _do_snapshot_create(self, params): diff --git a/releasenotes/notes/sf-fix-duplicate-volume-request-lost-adefacda1298dc62.yaml b/releasenotes/notes/sf-fix-duplicate-volume-request-lost-adefacda1298dc62.yaml new file mode 100644 index 00000000000..775bcd425f4 --- /dev/null +++ b/releasenotes/notes/sf-fix-duplicate-volume-request-lost-adefacda1298dc62.yaml @@ -0,0 +1,14 @@ +--- +fixes: + - | + NetApp SolidFire driver `Bug #1896112 + `_: + Fixes an issue that may duplicate volumes during creation, in case + the SolidFire backend successfully processes a request and creates + the volume, but fails to deliver the result back to the driver (the + response is lost). When this scenario occurs, the SolidFire driver + will retry the operation, which previously resulted in the creation + of a duplicate volume. This fix adds the ``sf_volume_create_timeout`` + configuration option (default value: 60 seconds) which specifies an + additional length of time that the driver will wait for the volume to + become active on the backend before raising an exception.