NetApp SolidFire: Fix clone and request timeout issues
Users are experiencing timeout issues in certain environments, mostly when volumes are too big (ie. multi-terabyte volumes), due to poor network performance or upgrade issues that revolve around the SolidFire cluster. A viable solution is to make driver timeout values configurable in cinder.conf, so users can set these timeouts according to their needs. This patch adds two timeout settings to the SolidFire driver (for cloning operation and globally to all api requests), to allow users to set the appropriate timeouts for their environment. Closes-Bug: #1898587 Change-Id: Ie330c76a5db0ea76d4fed5a6ae7b8736dadc8591
This commit is contained in:
parent
11fa011c86
commit
c607a82a99
@ -221,7 +221,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
||||
'volumeID': 6}]
|
||||
|
||||
def fake_issue_api_request(self, method, params, version='1.0',
|
||||
endpoint=None):
|
||||
endpoint=None, timeout=None):
|
||||
if method == 'GetClusterCapacity':
|
||||
data = {}
|
||||
if version == '1.0':
|
||||
@ -638,6 +638,12 @@ class SolidFireVolumeTestCase(test.TestCase):
|
||||
'volume_type_id': None,
|
||||
'created_at': timeutils.utcnow()}
|
||||
|
||||
fake_model_info = {
|
||||
'provider_id': '%s %s cluster-id-01' % (
|
||||
self.fake_sfvol['volumeID'],
|
||||
self.fake_sfaccount['accountID'])
|
||||
}
|
||||
|
||||
ctx = context.get_admin_context()
|
||||
testvol = fake_volume.fake_volume_obj(ctx, **updates_vol_a)
|
||||
testvol_b = fake_volume.fake_volume_obj(ctx, **updates_vol_b)
|
||||
@ -657,7 +663,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
||||
return_value=[]), \
|
||||
mock.patch.object(sfv,
|
||||
'_get_model_info',
|
||||
return_value={}):
|
||||
return_value=fake_model_info):
|
||||
sfv.create_cloned_volume(testvol_b, testvol)
|
||||
|
||||
def test_initialize_connector_with_blocksizes(self):
|
||||
@ -3041,6 +3047,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
||||
'mvip': self.mvip,
|
||||
'svip': self.svip}
|
||||
|
||||
self.configuration.sf_volume_clone_timeout = 1
|
||||
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
|
||||
sfv.replication_enabled = False
|
||||
|
||||
@ -3085,7 +3092,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
||||
mock_issue_api_request.assert_has_calls(calls)
|
||||
mock_test_set_cluster_pairs.assert_not_called()
|
||||
mock_update_attributes.assert_not_called()
|
||||
mock_get_model_info.assert_called_once()
|
||||
mock_get_model_info.assert_called()
|
||||
mock_snapshot_discovery.assert_not_called()
|
||||
|
||||
reset_mocks()
|
||||
|
@ -100,7 +100,18 @@ sf_opts = [
|
||||
default=3600,
|
||||
min=30,
|
||||
help='Sets time in seconds to wait for a migrating volume to '
|
||||
'complete pairing and sync.')]
|
||||
'complete pairing and sync.'),
|
||||
cfg.IntOpt('sf_api_request_timeout',
|
||||
default=30,
|
||||
min=30,
|
||||
help='Sets time in seconds to wait for an api request to '
|
||||
'complete.'),
|
||||
cfg.IntOpt('sf_volume_clone_timeout',
|
||||
default=600,
|
||||
min=60,
|
||||
help='Sets time in seconds to wait for a clone of a volume or '
|
||||
'snapshot to complete.'
|
||||
)]
|
||||
|
||||
CONF = cfg.CONF
|
||||
CONF.register_opts(sf_opts, group=configuration.SHARED_CONF_GROUP)
|
||||
@ -656,11 +667,14 @@ class SolidFireDriver(san.SanISCSIDriver):
|
||||
return endpoint
|
||||
|
||||
@retry(retry_exc_tuple, tries=6)
|
||||
def _issue_api_request(self, method, params, version='1.0', endpoint=None):
|
||||
def _issue_api_request(self, method, params, version='1.0',
|
||||
endpoint=None, timeout=None):
|
||||
if params is None:
|
||||
params = {}
|
||||
if endpoint is None:
|
||||
endpoint = self.active_cluster['endpoint']
|
||||
if not timeout:
|
||||
timeout = self.configuration.sf_api_request_timeout
|
||||
|
||||
payload = {'method': method, 'params': params}
|
||||
url = '%s/json-rpc/%s/' % (endpoint['url'], version)
|
||||
@ -672,7 +686,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
||||
data=json.dumps(payload),
|
||||
auth=(endpoint['login'], endpoint['passwd']),
|
||||
verify=self.verify_ssl,
|
||||
timeout=30)
|
||||
timeout=timeout)
|
||||
response = req.json()
|
||||
req.close()
|
||||
if (('error' in response) and
|
||||
@ -859,15 +873,13 @@ class SolidFireDriver(san.SanISCSIDriver):
|
||||
|
||||
def _get_model_info(self, sfaccount, sf_volume_id, endpoint=None):
|
||||
volume = None
|
||||
iteration_count = 0
|
||||
while not volume and iteration_count < 600:
|
||||
volume_list = self._get_volumes_by_sfaccount(
|
||||
sfaccount['accountID'], endpoint=endpoint)
|
||||
for v in volume_list:
|
||||
if v['volumeID'] == sf_volume_id:
|
||||
volume = v
|
||||
break
|
||||
iteration_count += 1
|
||||
volume_list = self._get_volumes_by_sfaccount(
|
||||
sfaccount['accountID'], endpoint=endpoint)
|
||||
|
||||
for v in volume_list:
|
||||
if v['volumeID'] == sf_volume_id:
|
||||
volume = v
|
||||
break
|
||||
|
||||
if not volume:
|
||||
LOG.error('Failed to retrieve volume SolidFire-'
|
||||
@ -937,10 +949,27 @@ class SolidFireDriver(san.SanISCSIDriver):
|
||||
params['volumeID'] = sf_cloned_id
|
||||
data = self._issue_api_request('ModifyVolume', params)
|
||||
|
||||
model_update = self._get_model_info(sf_account, sf_cloned_id)
|
||||
if model_update is None:
|
||||
mesg = _('Failed to get model update from clone')
|
||||
raise SolidFireAPIException(mesg)
|
||||
def _wait_volume_is_active():
|
||||
try:
|
||||
model_info = self._get_model_info(sf_account, sf_cloned_id)
|
||||
if model_info:
|
||||
raise loopingcall.LoopingCallDone(model_info)
|
||||
except exception.VolumeNotFound:
|
||||
LOG.debug('Waiting for cloned volume [%s] - [%s] to become '
|
||||
'active', sf_cloned_id, vref.id)
|
||||
pass
|
||||
|
||||
try:
|
||||
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(
|
||||
_wait_volume_is_active)
|
||||
model_update = timer.start(
|
||||
interval=1,
|
||||
timeout=self.configuration.sf_volume_clone_timeout).wait()
|
||||
except loopingcall.LoopingCallTimeOut:
|
||||
msg = _('Failed to get model update from clone [%s] - [%s]' %
|
||||
(sf_cloned_id, vref.id))
|
||||
LOG.error(msg)
|
||||
raise SolidFireAPIException(msg)
|
||||
|
||||
rep_settings = self._retrieve_replication_settings(vref)
|
||||
if self.replication_enabled and rep_settings:
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
`Bug #1898587 <https://bugs.launchpad.net/cinder/+bug/1898587>`_:
|
||||
Address cloning and api request timeout issues users may hit in
|
||||
certain environments, by allowing configuring timeout values for
|
||||
these operations through cinder configuration file.
|
Loading…
Reference in New Issue
Block a user