Retry heat stack polling on gateway problems

If the heat api is overloaded or temporarily unavailable, we might get a
503 or 504 from haproxy during the deployment. We should retry polling
for events in this case as to not prematurely exit the deployment.

Change-Id: I947cd0f9bf4a97e46c3d2bf3e9b986f7d38e9357
Closes-Bug: #1833452
This commit is contained in:
Alex Schultz
2019-06-19 13:45:23 -06:00
parent 4bfd472b38
commit cb42cfe30f
2 changed files with 81 additions and 6 deletions

View File

@@ -376,6 +376,54 @@ class TestWaitForStackUtil(TestCase):
complete = utils.wait_for_stack_ready(self.mock_orchestration, 'stack') complete = utils.wait_for_stack_ready(self.mock_orchestration, 'stack')
self.assertTrue(complete) self.assertTrue(complete)
@mock.patch("time.sleep")
@mock.patch("heatclient.common.event_utils.poll_for_events")
@mock.patch("tripleoclient.utils.get_stack")
def test_wait_for_stack_ready_retry(self, mock_get_stack, mock_poll,
mock_time):
stack = mock.Mock()
stack.stack_name = 'stack'
stack.stack_id = 'id'
stack.stack_status = "CREATE_COMPLETE"
mock_get_stack.return_value = stack
mock_poll.side_effect = [hc_exc.HTTPException(code=504),
("CREATE_COMPLETE", "ready retry message")]
complete = utils.wait_for_stack_ready(self.mock_orchestration, 'stack')
self.assertTrue(complete)
@mock.patch("time.sleep")
@mock.patch("heatclient.common.event_utils.poll_for_events")
@mock.patch("tripleoclient.utils.get_stack")
def test_wait_for_stack_ready_retry_fail(self, mock_get_stack, mock_poll,
mock_time):
stack = mock.Mock()
stack.stack_name = 'stack'
stack.stack_id = 'id'
stack.stack_status = "CREATE_COMPLETE"
mock_get_stack.return_value = stack
mock_poll.side_effect = hc_exc.HTTPException(code=504)
self.assertRaises(RuntimeError,
utils.wait_for_stack_ready,
self.mock_orchestration, 'stack')
@mock.patch("time.sleep")
@mock.patch("heatclient.common.event_utils.poll_for_events")
@mock.patch("tripleoclient.utils.get_stack")
def test_wait_for_stack_ready_server_fail(self, mock_get_stack, mock_poll,
mock_time):
stack = mock.Mock()
stack.stack_name = 'stack'
stack.stack_id = 'id'
stack.stack_status = "CREATE_COMPLETE"
mock_get_stack.return_value = stack
mock_poll.side_effect = hc_exc.HTTPException(code=500)
self.assertRaises(hc_exc.HTTPException,
utils.wait_for_stack_ready,
self.mock_orchestration, 'stack')
def test_wait_for_stack_ready_no_stack(self): def test_wait_for_stack_ready_no_stack(self):
self.mock_orchestration.stacks.get.return_value = None self.mock_orchestration.stacks.get.return_value = None

View File

@@ -418,7 +418,8 @@ def create_tempest_deployer_input(config_name='tempest-deployer-input.conf'):
def wait_for_stack_ready(orchestration_client, stack_name, marker=None, def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
action='CREATE', verbose=False): action='CREATE', verbose=False, poll_period=5,
nested_depth=2, max_retries=10):
"""Check the status of an orchestration stack """Check the status of an orchestration stack
Get the status of an orchestration stack and check whether it is complete Get the status of an orchestration stack and check whether it is complete
@@ -438,7 +439,17 @@ def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
:param verbose: Whether to print events :param verbose: Whether to print events
:type verbose: boolean :type verbose: boolean
:param nested_depth: Max depth to look for events
:type nested_depth: int
:param poll_period: How often to poll for events
:type poll_period: int
:param max_retries: Number of retries in the case of server problems
:type max_retries: int
""" """
log = logging.getLogger(__name__ + ".wait_for_stack_ready")
stack = get_stack(orchestration_client, stack_name) stack = get_stack(orchestration_client, stack_name)
if not stack: if not stack:
return False return False
@@ -448,11 +459,27 @@ def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
out = sys.stdout out = sys.stdout
else: else:
out = open(os.devnull, "w") out = open(os.devnull, "w")
stack_status, msg = event_utils.poll_for_events( retries = 0
orchestration_client, stack_name, action=action, while retries <= max_retries:
poll_period=5, marker=marker, out=out, nested_depth=2) try:
print(msg) stack_status, msg = event_utils.poll_for_events(
return stack_status == '%s_COMPLETE' % action orchestration_client, stack_name, action=action,
poll_period=5, marker=marker, out=out,
nested_depth=nested_depth)
print(msg)
return stack_status == '%s_COMPLETE' % action
except hc_exc.HTTPException as e:
if e.code in [503, 504]:
retries += 1
log.warning("Server issue while waiting for stack to be ready."
" Attempting retry {} of {}".format(retries,
max_retries))
time.sleep(retries * 5)
continue
log.error("Error occured while waiting for stack to be ready.")
raise e
raise RuntimeError(
"wait_for_stack_ready: Max retries {} reached".format(max_retries))
def wait_for_provision_state(baremetal_client, node_uuid, provision_state, def wait_for_provision_state(baremetal_client, node_uuid, provision_state,