Merge "Remediate releases stuck in non-DEPLOYED statuses"
This commit is contained in:
commit
52f29ddf73
|
@ -31,14 +31,15 @@ class ArmadaTimeoutException(ArmadaException):
|
||||||
|
|
||||||
class ProtectedReleaseException(ArmadaException):
|
class ProtectedReleaseException(ArmadaException):
|
||||||
'''
|
'''
|
||||||
Exception that occurs when Armada encounters a FAILED release that is
|
Exception that occurs when Armada encounters a release with status other
|
||||||
designated `protected` in the Chart and `continue_processing` is False.
|
than DEPLOYED that is designated `protected` in the Chart and
|
||||||
|
`continue_processing` is False.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, reason):
|
def __init__(self, release, status):
|
||||||
self._message = (
|
self._message = (
|
||||||
'Armada encountered protected release %s in FAILED status' %
|
'Armada encountered protected release {} in {} status'.format(
|
||||||
reason)
|
release, status))
|
||||||
super(ProtectedReleaseException, self).__init__(self._message)
|
super(ProtectedReleaseException, self).__init__(self._message)
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,13 +89,16 @@ class WaitException(ArmadaException):
|
||||||
super(WaitException, self).__init__(message)
|
super(WaitException, self).__init__(message)
|
||||||
|
|
||||||
|
|
||||||
class UnexpectedReleaseStatusException(ArmadaException):
|
class DeploymentLikelyPendingException(ArmadaException):
|
||||||
'''
|
'''
|
||||||
Exception that occurs when armada encounters an existing release for a
|
Exception that occurs when it is detected that an existing release
|
||||||
chart with an unexpected status which armada does not know what to do with.
|
operation (e.g. install, update, rollback, delete) is likely still pending.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, release_name, status):
|
def __init__(self, release, status, last_deployment_age, timeout):
|
||||||
self._message = "Found release {} in unexpected status {}".format(
|
self._message = (
|
||||||
release_name, status)
|
'Existing deployment likely pending '
|
||||||
super(UnexpectedReleaseStatusException, self).__init__(self._message)
|
'release={}, status={}, '
|
||||||
|
'(last deployment age={}s) < (chart wait timeout={}s)'.format(
|
||||||
|
release, status, last_deployment_age, timeout))
|
||||||
|
super(DeploymentLikelyPendingException, self).__init__(self._message)
|
||||||
|
|
|
@ -52,19 +52,12 @@ class ChartDeploy(object):
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
protected = chart.get('protected', {})
|
|
||||||
p_continue = protected.get('continue_processing', False)
|
|
||||||
|
|
||||||
old_release = self.find_chart_release(known_releases, release_name)
|
old_release = self.find_chart_release(known_releases, release_name)
|
||||||
|
|
||||||
status = None
|
status = None
|
||||||
if old_release:
|
if old_release:
|
||||||
status = r.get_release_status(old_release)
|
status = r.get_release_status(old_release)
|
||||||
|
|
||||||
if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]:
|
|
||||||
raise armada_exceptions.UnexpectedReleaseStatusException(
|
|
||||||
release_name, status)
|
|
||||||
|
|
||||||
chart_wait = ChartWait(
|
chart_wait = ChartWait(
|
||||||
self.tiller.k8s,
|
self.tiller.k8s,
|
||||||
release_name,
|
release_name,
|
||||||
|
@ -82,29 +75,6 @@ class ChartDeploy(object):
|
||||||
chartbuilder = ChartBuilder(chart)
|
chartbuilder = ChartBuilder(chart)
|
||||||
new_chart = chartbuilder.get_helm_chart()
|
new_chart = chartbuilder.get_helm_chart()
|
||||||
|
|
||||||
# Check for existing FAILED release, and purge
|
|
||||||
if status == const.STATUS_FAILED:
|
|
||||||
LOG.info('Purging FAILED release %s before deployment.',
|
|
||||||
release_name)
|
|
||||||
if protected:
|
|
||||||
if p_continue:
|
|
||||||
LOG.warn(
|
|
||||||
'Release %s is `protected`, '
|
|
||||||
'continue_processing=True. Operator must '
|
|
||||||
'handle FAILED release manually.', release_name)
|
|
||||||
result['protected'] = release_name
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
LOG.error(
|
|
||||||
'Release %s is `protected`, '
|
|
||||||
'continue_processing=False.', release_name)
|
|
||||||
raise armada_exceptions.ProtectedReleaseException(
|
|
||||||
release_name)
|
|
||||||
else:
|
|
||||||
# Purge the release
|
|
||||||
self.tiller.uninstall_release(release_name)
|
|
||||||
result['purge'] = release_name
|
|
||||||
|
|
||||||
# TODO(mark-burnett): It may be more robust to directly call
|
# TODO(mark-burnett): It may be more robust to directly call
|
||||||
# tiller status to decide whether to install/upgrade rather
|
# tiller status to decide whether to install/upgrade rather
|
||||||
# than checking for list membership.
|
# than checking for list membership.
|
||||||
|
@ -181,6 +151,62 @@ class ChartDeploy(object):
|
||||||
tiller_result.__dict__)
|
tiller_result.__dict__)
|
||||||
result['upgrade'] = release_name
|
result['upgrade'] = release_name
|
||||||
else:
|
else:
|
||||||
|
# Check for release with status other than DEPLOYED
|
||||||
|
if status:
|
||||||
|
if status != const.STATUS_FAILED:
|
||||||
|
LOG.warn(
|
||||||
|
'Unexpected release status encountered '
|
||||||
|
'release=%s, status=%s', release_name, status)
|
||||||
|
|
||||||
|
# Make best effort to determine whether a deployment is
|
||||||
|
# likely pending, by checking if the last deployment
|
||||||
|
# was started within the timeout window of the chart.
|
||||||
|
last_deployment_age = r.get_last_deployment_age(
|
||||||
|
old_release)
|
||||||
|
wait_timeout = chart_wait.get_timeout()
|
||||||
|
likely_pending = last_deployment_age <= wait_timeout
|
||||||
|
if likely_pending:
|
||||||
|
# Give up if a deployment is likely pending, we do not
|
||||||
|
# want to have multiple operations going on for the
|
||||||
|
# same release at the same time.
|
||||||
|
raise armada_exceptions.\
|
||||||
|
DeploymentLikelyPendingException(
|
||||||
|
release_name, status, last_deployment_age,
|
||||||
|
wait_timeout)
|
||||||
|
else:
|
||||||
|
# Release is likely stuck in an unintended (by tiller)
|
||||||
|
# state. Log and continue on with remediation steps
|
||||||
|
# below.
|
||||||
|
LOG.info(
|
||||||
|
'Old release %s likely stuck in status %s, '
|
||||||
|
'(last deployment age=%ss) >= '
|
||||||
|
'(chart wait timeout=%ss)', release, status,
|
||||||
|
last_deployment_age, wait_timeout)
|
||||||
|
|
||||||
|
protected = chart.get('protected', {})
|
||||||
|
if protected:
|
||||||
|
p_continue = protected.get('continue_processing', False)
|
||||||
|
if p_continue:
|
||||||
|
LOG.warn(
|
||||||
|
'Release %s is `protected`, '
|
||||||
|
'continue_processing=True. Operator must '
|
||||||
|
'handle %s release manually.', release_name,
|
||||||
|
status)
|
||||||
|
result['protected'] = release_name
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
LOG.error(
|
||||||
|
'Release %s is `protected`, '
|
||||||
|
'continue_processing=False.', release_name)
|
||||||
|
raise armada_exceptions.ProtectedReleaseException(
|
||||||
|
release_name, status)
|
||||||
|
else:
|
||||||
|
# Purge the release
|
||||||
|
LOG.info('Purging release %s with status %s', release_name,
|
||||||
|
status)
|
||||||
|
self.tiller.uninstall_release(release_name)
|
||||||
|
result['purge'] = release_name
|
||||||
|
|
||||||
timer = int(round(deadline - time.time()))
|
timer = int(round(deadline - time.time()))
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Installing release %s in namespace %s, wait=%s, "
|
"Installing release %s in namespace %s, wait=%s, "
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
|
|
||||||
from armada.handlers.test import Test
|
from armada.handlers.test import Test
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
def release_prefixer(prefix, release):
|
def release_prefixer(prefix, release):
|
||||||
'''
|
'''
|
||||||
|
@ -53,3 +55,17 @@ def get_last_test_result(release):
|
||||||
if not status.HasField('last_test_suite_run'):
|
if not status.HasField('last_test_suite_run'):
|
||||||
return None
|
return None
|
||||||
return Test.get_test_suite_run_success(status.last_test_suite_run)
|
return Test.get_test_suite_run_success(status.last_test_suite_run)
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_deployment_age(release):
|
||||||
|
"""
|
||||||
|
:param release: protobuf release object
|
||||||
|
|
||||||
|
:return: age in seconds of last deployment of release
|
||||||
|
"""
|
||||||
|
|
||||||
|
last_deployed = release.info.last_deployed.seconds
|
||||||
|
now = int(time.time())
|
||||||
|
last_deployment_age = now - last_deployed
|
||||||
|
|
||||||
|
return last_deployment_age
|
||||||
|
|
|
@ -47,7 +47,7 @@ Armada Exceptions
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
|
|
||||||
.. autoexception:: armada.exceptions.armada_exceptions.UnexpectedReleaseStatusException
|
.. autoexception:: armada.exceptions.armada_exceptions.DeploymentLikelyPendingException
|
||||||
:members:
|
:members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
|
|
Loading…
Reference in New Issue