Add retry logic mechanism
Added retry logic mechanism and added them to the API calls where ironic presently attempts to pull an exclusive lock on the baremetal node. Change-Id: Ia6fbc9eec612793b3214d7883e0552913a088d5d
This commit is contained in:

committed by
Monty Taylor

parent
a50a784375
commit
baef9e52bc
@@ -22,6 +22,7 @@ import re
|
|||||||
import six
|
import six
|
||||||
import sre_constants
|
import sre_constants
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from decorator import decorator
|
from decorator import decorator
|
||||||
@@ -486,6 +487,60 @@ def safe_dict_max(key, data):
|
|||||||
return max_value
|
return max_value
|
||||||
|
|
||||||
|
|
||||||
|
def _call_client_and_retry(client, url, retry_on=None,
|
||||||
|
call_retries=3, retry_wait=2,
|
||||||
|
**kwargs):
|
||||||
|
"""Method to provide retry operations.
|
||||||
|
|
||||||
|
Some APIs utilize HTTP errors on certian operations to indicate that
|
||||||
|
the resource is presently locked, and as such this mechanism provides
|
||||||
|
the ability to retry upon known error codes.
|
||||||
|
|
||||||
|
:param object client: The client method, such as:
|
||||||
|
``self.baremetal_client.post``
|
||||||
|
:param string url: The URL to perform the operation upon.
|
||||||
|
:param integer retry_on: A list of error codes that can be retried on.
|
||||||
|
The method also supports a single integer to be
|
||||||
|
defined.
|
||||||
|
:param integer call_retries: The number of times to retry the call upon
|
||||||
|
the error code defined by the 'retry_on'
|
||||||
|
parameter. Default: 3
|
||||||
|
:param integer retry_wait: The time in seconds to wait between retry
|
||||||
|
attempts. Default: 2
|
||||||
|
|
||||||
|
:returns: The object returned by the client call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# NOTE(TheJulia): This method, as of this note, does not have direct
|
||||||
|
# unit tests, although is fairly well tested by the tests checking
|
||||||
|
# retry logic in test_baremetal_node.py.
|
||||||
|
log = _log.setup_logging('shade.http')
|
||||||
|
|
||||||
|
if isinstance(retry_on, int):
|
||||||
|
retry_on = [retry_on]
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
while (count < call_retries):
|
||||||
|
count += 1
|
||||||
|
try:
|
||||||
|
ret_val = client(url, **kwargs)
|
||||||
|
except exc.OpenStackCloudHTTPError as e:
|
||||||
|
if (retry_on is not None and
|
||||||
|
e.response.status_code in retry_on):
|
||||||
|
log.debug('Received retryable error {err}, waiting '
|
||||||
|
'{wait} seconds to retry', {
|
||||||
|
'err': e.response.status_code,
|
||||||
|
'wait': retry_wait
|
||||||
|
})
|
||||||
|
time.sleep(retry_wait)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
# Break out of the loop, since the loop should only continue
|
||||||
|
# when we encounter a known connection error.
|
||||||
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def parse_range(value):
|
def parse_range(value):
|
||||||
"""Parse a numerical range string.
|
"""Parse a numerical range string.
|
||||||
|
|
||||||
|
@@ -9187,7 +9187,9 @@ class OpenStackCloud(_normalize.Normalizer):
|
|||||||
port = self._baremetal_client.get(port_url, microversion=1.6,
|
port = self._baremetal_client.get(port_url, microversion=1.6,
|
||||||
error_message=port_msg)
|
error_message=port_msg)
|
||||||
port_url = '/ports/{uuid}'.format(uuid=port['ports'][0]['uuid'])
|
port_url = '/ports/{uuid}'.format(uuid=port['ports'][0]['uuid'])
|
||||||
self._baremetal_client.delete(port_url, error_message=port_msg)
|
_utils._call_client_and_retry(self._baremetal_client.delete,
|
||||||
|
port_url, retry_on=[409, 503],
|
||||||
|
error_message=port_msg)
|
||||||
|
|
||||||
with _utils.shade_exceptions(
|
with _utils.shade_exceptions(
|
||||||
"Error unregistering machine {node_id} from the baremetal "
|
"Error unregistering machine {node_id} from the baremetal "
|
||||||
@@ -9199,10 +9201,11 @@ class OpenStackCloud(_normalize.Normalizer):
|
|||||||
# microversions in future releases, as such, we explicitly set
|
# microversions in future releases, as such, we explicitly set
|
||||||
# the version to what we have been using with the client library..
|
# the version to what we have been using with the client library..
|
||||||
version = "1.6"
|
version = "1.6"
|
||||||
msg = "Baremetal machine failed to be deleted."
|
msg = "Baremetal machine failed to be deleted"
|
||||||
url = '/nodes/{node_id}'.format(
|
url = '/nodes/{node_id}'.format(
|
||||||
node_id=uuid)
|
node_id=uuid)
|
||||||
self._baremetal_client.delete(url,
|
_utils._call_client_and_retry(self._baremetal_client.delete,
|
||||||
|
url, retry_on=[409, 503],
|
||||||
error_message=msg,
|
error_message=msg,
|
||||||
microversion=version)
|
microversion=version)
|
||||||
|
|
||||||
@@ -9428,10 +9431,12 @@ class OpenStackCloud(_normalize.Normalizer):
|
|||||||
if configdrive:
|
if configdrive:
|
||||||
payload['configdrive'] = configdrive
|
payload['configdrive'] = configdrive
|
||||||
|
|
||||||
machine = self._baremetal_client.put(url,
|
machine = _utils._call_client_and_retry(self._baremetal_client.put,
|
||||||
json=payload,
|
url,
|
||||||
error_message=msg,
|
retry_on=[409, 503],
|
||||||
microversion=version)
|
json=payload,
|
||||||
|
error_message=msg,
|
||||||
|
microversion=version)
|
||||||
if wait:
|
if wait:
|
||||||
for count in utils.iterate_timeout(
|
for count in utils.iterate_timeout(
|
||||||
timeout,
|
timeout,
|
||||||
@@ -9532,10 +9537,12 @@ class OpenStackCloud(_normalize.Normalizer):
|
|||||||
else:
|
else:
|
||||||
desired_state = 'power {state}'.format(state=state)
|
desired_state = 'power {state}'.format(state=state)
|
||||||
payload = {'target': desired_state}
|
payload = {'target': desired_state}
|
||||||
self._baremetal_client.put(url,
|
_utils._call_client_and_retry(self._baremetal_client.put,
|
||||||
json=payload,
|
url,
|
||||||
error_message=msg,
|
retry_on=[409, 503],
|
||||||
microversion="1.6")
|
json=payload,
|
||||||
|
error_message=msg,
|
||||||
|
microversion="1.6")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def set_machine_power_on(self, name_or_id):
|
def set_machine_power_on(self, name_or_id):
|
||||||
|
@@ -555,6 +555,40 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||||||
|
|
||||||
self.assert_calls()
|
self.assert_calls()
|
||||||
|
|
||||||
|
def test_set_machine_power_on_with_retires(self):
|
||||||
|
# NOTE(TheJulia): This logic ends up testing power on/off and reboot
|
||||||
|
# as they all utilize the same helper method.
|
||||||
|
self.register_uris([
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
status_code=503,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'power']),
|
||||||
|
validate=dict(json={'target': 'power on'})),
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
status_code=409,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'power']),
|
||||||
|
validate=dict(json={'target': 'power on'})),
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'power']),
|
||||||
|
validate=dict(json={'target': 'power on'})),
|
||||||
|
])
|
||||||
|
return_value = self.cloud.set_machine_power_on(
|
||||||
|
self.fake_baremetal_node['uuid'])
|
||||||
|
self.assertIsNone(return_value)
|
||||||
|
|
||||||
|
self.assert_calls()
|
||||||
|
|
||||||
def test_set_machine_power_off(self):
|
def test_set_machine_power_off(self):
|
||||||
self.register_uris([
|
self.register_uris([
|
||||||
dict(
|
dict(
|
||||||
@@ -632,6 +666,51 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||||||
|
|
||||||
self.assert_calls()
|
self.assert_calls()
|
||||||
|
|
||||||
|
def test_node_set_provision_state_with_retries(self):
|
||||||
|
deploy_node = self.fake_baremetal_node.copy()
|
||||||
|
deploy_node['provision_state'] = 'deploying'
|
||||||
|
active_node = self.fake_baremetal_node.copy()
|
||||||
|
active_node['provision_state'] = 'active'
|
||||||
|
self.register_uris([
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
status_code=409,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'provision']),
|
||||||
|
validate=dict(json={'target': 'active',
|
||||||
|
'configdrive': 'http://host/file'})),
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
status_code=503,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'provision']),
|
||||||
|
validate=dict(json={'target': 'active',
|
||||||
|
'configdrive': 'http://host/file'})),
|
||||||
|
dict(
|
||||||
|
method='PUT',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid'],
|
||||||
|
'states', 'provision']),
|
||||||
|
validate=dict(json={'target': 'active',
|
||||||
|
'configdrive': 'http://host/file'})),
|
||||||
|
dict(method='GET',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid']]),
|
||||||
|
json=self.fake_baremetal_node),
|
||||||
|
])
|
||||||
|
self.cloud.node_set_provision_state(
|
||||||
|
self.fake_baremetal_node['uuid'],
|
||||||
|
'active',
|
||||||
|
configdrive='http://host/file')
|
||||||
|
|
||||||
|
self.assert_calls()
|
||||||
|
|
||||||
def test_node_set_provision_state_wait_timeout(self):
|
def test_node_set_provision_state_wait_timeout(self):
|
||||||
deploy_node = self.fake_baremetal_node.copy()
|
deploy_node = self.fake_baremetal_node.copy()
|
||||||
deploy_node['provision_state'] = 'deploying'
|
deploy_node['provision_state'] = 'deploying'
|
||||||
@@ -1410,6 +1489,64 @@ class TestBaremetalNode(base.IronicTestCase):
|
|||||||
timeout=0.001)
|
timeout=0.001)
|
||||||
self.assert_calls()
|
self.assert_calls()
|
||||||
|
|
||||||
|
def test_unregister_machine_retries(self):
|
||||||
|
mac_address = self.fake_baremetal_port['address']
|
||||||
|
nics = [{'mac': mac_address}]
|
||||||
|
port_uuid = self.fake_baremetal_port['uuid']
|
||||||
|
# NOTE(TheJulia): The two values below should be the same.
|
||||||
|
port_node_uuid = self.fake_baremetal_port['node_uuid']
|
||||||
|
port_url_address = 'detail?address=%s' % mac_address
|
||||||
|
self.fake_baremetal_node['provision_state'] = 'available'
|
||||||
|
self.register_uris([
|
||||||
|
dict(
|
||||||
|
method='GET',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid']]),
|
||||||
|
json=self.fake_baremetal_node),
|
||||||
|
dict(
|
||||||
|
method='GET',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='ports',
|
||||||
|
append=[port_url_address]),
|
||||||
|
json={'ports': [{'address': mac_address,
|
||||||
|
'node_uuid': port_node_uuid,
|
||||||
|
'uuid': port_uuid}]}),
|
||||||
|
dict(
|
||||||
|
method='DELETE',
|
||||||
|
status_code=503,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='ports',
|
||||||
|
append=[self.fake_baremetal_port['uuid']])),
|
||||||
|
dict(
|
||||||
|
method='DELETE',
|
||||||
|
status_code=409,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='ports',
|
||||||
|
append=[self.fake_baremetal_port['uuid']])),
|
||||||
|
dict(
|
||||||
|
method='DELETE',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='ports',
|
||||||
|
append=[self.fake_baremetal_port['uuid']])),
|
||||||
|
dict(
|
||||||
|
method='DELETE',
|
||||||
|
status_code=409,
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid']])),
|
||||||
|
dict(
|
||||||
|
method='DELETE',
|
||||||
|
uri=self.get_mock_url(
|
||||||
|
resource='nodes',
|
||||||
|
append=[self.fake_baremetal_node['uuid']])),
|
||||||
|
])
|
||||||
|
|
||||||
|
self.cloud.unregister_machine(
|
||||||
|
nics, self.fake_baremetal_node['uuid'])
|
||||||
|
|
||||||
|
self.assert_calls()
|
||||||
|
|
||||||
def test_unregister_machine_unavailable(self):
|
def test_unregister_machine_unavailable(self):
|
||||||
# This is a list of invalid states that the method
|
# This is a list of invalid states that the method
|
||||||
# should fail on.
|
# should fail on.
|
||||||
|
Reference in New Issue
Block a user