Retry heat stack polling on gateway problems

If the heat api is overloaded or temporarily unavailable, we might get a
503 or 504 from haproxy during the deployment. We should retry polling
for events in this case as to not prematurely exit the deployment.

Change-Id: I947cd0f9bf4a97e46c3d2bf3e9b986f7d38e9357
Closes-Bug: #1833452
This commit is contained in:
Alex Schultz
2019-06-19 13:45:23 -06:00
parent 4bfd472b38
commit cb42cfe30f
2 changed files with 81 additions and 6 deletions

View File

@@ -418,7 +418,8 @@ def create_tempest_deployer_input(config_name='tempest-deployer-input.conf'):
def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
action='CREATE', verbose=False):
action='CREATE', verbose=False, poll_period=5,
nested_depth=2, max_retries=10):
"""Check the status of an orchestration stack
Get the status of an orchestration stack and check whether it is complete
@@ -438,7 +439,17 @@ def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
:param verbose: Whether to print events
:type verbose: boolean
:param nested_depth: Max depth to look for events
:type nested_depth: int
:param poll_period: How often to poll for events
:type poll_period: int
:param max_retries: Number of retries in the case of server problems
:type max_retries: int
"""
log = logging.getLogger(__name__ + ".wait_for_stack_ready")
stack = get_stack(orchestration_client, stack_name)
if not stack:
return False
@@ -448,11 +459,27 @@ def wait_for_stack_ready(orchestration_client, stack_name, marker=None,
out = sys.stdout
else:
out = open(os.devnull, "w")
stack_status, msg = event_utils.poll_for_events(
orchestration_client, stack_name, action=action,
poll_period=5, marker=marker, out=out, nested_depth=2)
print(msg)
return stack_status == '%s_COMPLETE' % action
retries = 0
while retries <= max_retries:
try:
stack_status, msg = event_utils.poll_for_events(
orchestration_client, stack_name, action=action,
poll_period=5, marker=marker, out=out,
nested_depth=nested_depth)
print(msg)
return stack_status == '%s_COMPLETE' % action
except hc_exc.HTTPException as e:
if e.code in [503, 504]:
retries += 1
log.warning("Server issue while waiting for stack to be ready."
" Attempting retry {} of {}".format(retries,
max_retries))
time.sleep(retries * 5)
continue
log.error("Error occured while waiting for stack to be ready.")
raise e
raise RuntimeError(
"wait_for_stack_ready: Max retries {} reached".format(max_retries))
def wait_for_provision_state(baremetal_client, node_uuid, provision_state,