NetApp SolidFire: Fix clone and request timeout issues
Users are experiencing timeout issues in certain environments, mostly when volumes are too big (ie. multi-terabyte volumes), due to poor network performance or upgrade issues that revolve around the SolidFire cluster. A viable solution is to make driver timeout values configurable in cinder.conf, so users can set these timeouts according to their needs. This patch adds two timeout settings to the SolidFire driver (for cloning operation and globally to all api requests), to allow users to set the appropriate timeouts for their environment. Closes-Bug: #1898587 Change-Id: Ie330c76a5db0ea76d4fed5a6ae7b8736dadc8591 (cherry picked from commitc607a82a99
) (cherry picked from commit3b2e7d662b
) (cherry picked from commitb790e81e74
)
This commit is contained in:
parent
88133fae47
commit
ddb88caad8
|
@ -180,8 +180,8 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||
'volumeID': 6}]
|
||||
|
||||
def fake_issue_api_request(self, method, params, version='1.0',
|
||||
endpoint=None):
|
||||
if method is 'GetClusterCapacity':
|
||||
endpoint=None, timeout=None):
|
||||
if method == 'GetClusterCapacity':
|
||||
data = {}
|
||||
if version == '1.0':
|
||||
data = {'result': {'clusterCapacity': {
|
||||
|
@ -597,6 +597,12 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||
'volume_type_id': None,
|
||||
'created_at': timeutils.utcnow()}
|
||||
|
||||
fake_model_info = {
|
||||
'provider_id': '%s %s cluster-id-01' % (
|
||||
self.fake_sfvol['volumeID'],
|
||||
self.fake_sfaccount['accountID'])
|
||||
}
|
||||
|
||||
ctx = context.get_admin_context()
|
||||
testvol = fake_volume.fake_volume_obj(ctx, **updates_vol_a)
|
||||
testvol_b = fake_volume.fake_volume_obj(ctx, **updates_vol_b)
|
||||
|
@ -616,7 +622,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||
return_value=[]), \
|
||||
mock.patch.object(sfv,
|
||||
'_get_model_info',
|
||||
return_value={}):
|
||||
return_value=fake_model_info):
|
||||
sfv.create_cloned_volume(testvol_b, testvol)
|
||||
|
||||
def test_initialize_connector_with_blocksizes(self):
|
||||
|
@ -2948,6 +2954,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||
'mvip': self.mvip,
|
||||
'svip': self.svip}
|
||||
|
||||
self.configuration.sf_volume_clone_timeout = 1
|
||||
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
|
||||
sfv.replication_enabled = False
|
||||
|
||||
|
@ -2992,7 +2999,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||
mock_issue_api_request.assert_has_calls(calls)
|
||||
mock_test_set_cluster_pairs.assert_not_called()
|
||||
mock_update_attributes.assert_not_called()
|
||||
mock_get_model_info.assert_called_once()
|
||||
mock_get_model_info.assert_called()
|
||||
mock_snapshot_discovery.assert_not_called()
|
||||
|
||||
reset_mocks()
|
||||
|
|
|
@ -24,6 +24,7 @@ import warnings
|
|||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import timeutils
|
||||
from oslo_utils import units
|
||||
|
@ -86,7 +87,18 @@ sf_opts = [
|
|||
'provisioning calculations. If this parameter is set to '
|
||||
'\'usedSpace\', the driver will report correct '
|
||||
'values as expected by Cinder '
|
||||
'thin provisioning.')]
|
||||
'thin provisioning.'),
|
||||
cfg.IntOpt('sf_api_request_timeout',
|
||||
default=30,
|
||||
min=30,
|
||||
help='Sets time in seconds to wait for an api request to '
|
||||
'complete.'),
|
||||
cfg.IntOpt('sf_volume_clone_timeout',
|
||||
default=600,
|
||||
min=60,
|
||||
help='Sets time in seconds to wait for a clone of a volume or '
|
||||
'snapshot to complete.'
|
||||
)]
|
||||
|
||||
CONF = cfg.CONF
|
||||
CONF.register_opts(sf_opts, group=configuration.SHARED_CONF_GROUP)
|
||||
|
@ -582,11 +594,14 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||
return endpoint
|
||||
|
||||
@retry(retry_exc_tuple, tries=6)
|
||||
def _issue_api_request(self, method, params, version='1.0', endpoint=None):
|
||||
def _issue_api_request(self, method, params, version='1.0',
|
||||
endpoint=None, timeout=None):
|
||||
if params is None:
|
||||
params = {}
|
||||
if endpoint is None:
|
||||
endpoint = self.active_cluster['endpoint']
|
||||
if not timeout:
|
||||
timeout = self.configuration.sf_api_request_timeout
|
||||
|
||||
payload = {'method': method, 'params': params}
|
||||
url = '%s/json-rpc/%s/' % (endpoint['url'], version)
|
||||
|
@ -598,7 +613,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||
data=json.dumps(payload),
|
||||
auth=(endpoint['login'], endpoint['passwd']),
|
||||
verify=self.verify_ssl,
|
||||
timeout=30)
|
||||
timeout=timeout)
|
||||
response = req.json()
|
||||
req.close()
|
||||
if (('error' in response) and
|
||||
|
@ -785,15 +800,13 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||
|
||||
def _get_model_info(self, sfaccount, sf_volume_id, endpoint=None):
|
||||
volume = None
|
||||
iteration_count = 0
|
||||
while not volume and iteration_count < 600:
|
||||
volume_list = self._get_volumes_by_sfaccount(
|
||||
sfaccount['accountID'], endpoint=endpoint)
|
||||
for v in volume_list:
|
||||
if v['volumeID'] == sf_volume_id:
|
||||
volume = v
|
||||
break
|
||||
iteration_count += 1
|
||||
volume_list = self._get_volumes_by_sfaccount(
|
||||
sfaccount['accountID'], endpoint=endpoint)
|
||||
|
||||
for v in volume_list:
|
||||
if v['volumeID'] == sf_volume_id:
|
||||
volume = v
|
||||
break
|
||||
|
||||
if not volume:
|
||||
LOG.error('Failed to retrieve volume SolidFire-'
|
||||
|
@ -863,10 +876,28 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||
params['volumeID'] = sf_cloned_id
|
||||
data = self._issue_api_request('ModifyVolume', params)
|
||||
|
||||
model_update = self._get_model_info(sf_account, sf_cloned_id)
|
||||
if model_update is None:
|
||||
mesg = _('Failed to get model update from clone')
|
||||
raise SolidFireAPIException(mesg)
|
||||
def _wait_volume_is_active():
|
||||
try:
|
||||
model_info = self._get_model_info(sf_account, sf_cloned_id)
|
||||
if model_info:
|
||||
raise loopingcall.LoopingCallDone(model_info)
|
||||
except exception.VolumeNotFound:
|
||||
LOG.debug('Waiting for cloned volume [%s] - [%s] to become '
|
||||
'active', sf_cloned_id, vref.id)
|
||||
pass
|
||||
|
||||
try:
|
||||
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(
|
||||
_wait_volume_is_active)
|
||||
model_update = timer.start(
|
||||
interval=1,
|
||||
timeout=self.configuration.sf_volume_clone_timeout).wait()
|
||||
except loopingcall.LoopingCallTimeOut:
|
||||
msg = (_('Failed to get model update from clone '
|
||||
'%(cloned_id)s - %(vref_id)s') %
|
||||
{'cloned_id': sf_cloned_id, 'vref_id': vref.id})
|
||||
LOG.error(msg)
|
||||
raise SolidFireAPIException(msg)
|
||||
|
||||
rep_settings = self._retrieve_replication_settings(vref)
|
||||
if self.replication_enabled and rep_settings:
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
fixes:
|
||||
- |
|
||||
`Bug #1898587 <https://bugs.launchpad.net/cinder/+bug/1898587>`_:
|
||||
Address cloning and api request timeout issues users may hit in
|
||||
certain environments, by allowing configuring timeout values for
|
||||
these operations through cinder configuration file.
|
Loading…
Reference in New Issue