Implement `protected` parameter

The `protected` parameter will be used to signify that we should
never purge a release in FAILED status. You can specify the
`continue_processing` param to either skip the failed release and
continue on, or to halt armada execution immediately.

- Add protected param to Chart schema and documentation.
- Implement protection logic.
- Moved purging of FAILED releases out of pre-flight and into sync
  for finer control over protected params. This means failed
  releases are now purged one at a time instead of all up front.
- Added purge and protected charts to final client `msg` return.

- Fix: Added missing dry-run protection in tiller delete resources.

Change-Id: Ia893a486d22cc1022b542ab7c22f58af12025523
This commit is contained in:
Marshall Margenau 2018-06-15 01:07:34 -05:00
parent ae66202a6a
commit 6546139155
8 changed files with 279 additions and 111 deletions

View File

@ -190,6 +190,10 @@ class ApplyManifest(CliAction):
for ch in resp[result]:
if not result == 'diff':
msg = 'Chart {} took action: {}'.format(ch, result)
if result == 'protected':
msg += ' and requires operator attention.'
elif result == 'purge':
msg += ' before install/upgrade.'
self.logger.info(msg)
else:
self.logger.info('Chart/values diff: %s', ch)

View File

@ -27,3 +27,16 @@ class ArmadaTimeoutException(ArmadaException):
def __init__(self, reason):
self._message = 'Armada timed out waiting on: %s' % (reason)
super(ArmadaTimeoutException, self).__init__(self._message)
class ProtectedReleaseException(ArmadaException):
'''
Exception that occurs when Armada encounters a FAILED release that is
designated `protected` in the Chart and `continue_processing` is False.
'''
def __init__(self, reason):
self._message = (
'Armada encountered protected release %s in FAILED status' % reason
)
super(ProtectedReleaseException, self).__init__(self._message)

View File

@ -20,7 +20,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from armada import const
from armada.exceptions.armada_exceptions import ArmadaTimeoutException
from armada.exceptions import armada_exceptions
from armada.exceptions import source_exceptions
from armada.exceptions import tiller_exceptions
from armada.exceptions import validate_exceptions
@ -99,13 +99,11 @@ class Armada(object):
tiller_host=tiller_host, tiller_port=tiller_port,
tiller_namespace=tiller_namespace, dry_run=dry_run)
self.documents = Override(
documents, overrides=set_ovr,
values=values).update_manifests()
documents, overrides=set_ovr, values=values).update_manifests()
self.k8s_wait_attempts = k8s_wait_attempts
self.k8s_wait_attempt_sleep = k8s_wait_attempt_sleep
self.manifest = Manifest(
self.documents,
target_manifest=target_manifest).get_manifest()
self.documents, target_manifest=target_manifest).get_manifest()
def find_release_chart(self, known_releases, release_name):
'''
@ -137,31 +135,16 @@ class Armada(object):
raise validate_exceptions.InvalidManifestException(
error_messages=details)
# TODO(MarshM): this is duplicated inside validate_armada_documents()
# L126, and should be unreachable code if it has errors
result, msg_list = validate.validate_armada_manifests(self.documents)
if not result:
raise validate_exceptions.InvalidArmadaObjectException(
details=','.join([m.get('message') for m in msg_list]))
# Purge known releases that have failed and are in the current yaml
manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {})
prefix = manifest_data.get(const.KEYWORD_PREFIX, '')
failed_releases = self.get_releases_by_status(const.STATUS_FAILED)
for release in failed_releases:
for group in manifest_data.get(const.KEYWORD_GROUPS, []):
for ch in group.get(const.KEYWORD_CHARTS, []):
ch_release_name = release_prefixer(
prefix, ch.get('chart', {}).get('release'))
if release[0] == ch_release_name:
LOG.info('Purging failed release %s '
'before deployment.', release[0])
self.tiller.uninstall_release(release[0])
# Clone the chart sources
#
# We only support a git source type right now, which can also
# handle git:// local paths as well
repos = {}
manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {})
for group in manifest_data.get(const.KEYWORD_GROUPS, []):
for ch in group.get(const.KEYWORD_CHARTS, []):
self.tag_cloned_repo(ch, repos)
@ -217,19 +200,25 @@ class Armada(object):
chart_name = chart.get('chart_name')
raise source_exceptions.ChartSourceException(ct_type, chart_name)
def get_releases_by_status(self, status):
def _get_releases_by_status(self):
'''
:params status - status string to filter releases on
Return a list of current releases with a specified status
Return a list of current releases with DEPLOYED or FAILED status
'''
filtered_releases = []
deployed_releases = []
failed_releases = []
known_releases = self.tiller.list_charts()
for release in known_releases:
if release[4] == status:
filtered_releases.append(release)
if release[4] == const.STATUS_DEPLOYED:
deployed_releases.append(release)
elif release[4] == const.STATUS_FAILED:
failed_releases.append(release)
else:
# tiller.list_charts() only looks at DEPLOYED/FAILED so
# this should be unreachable
LOG.debug('Ignoring release %s in status %s.',
release[0], release[4])
return filtered_releases
return deployed_releases, failed_releases
def sync(self):
'''
@ -238,16 +227,23 @@ class Armada(object):
if self.dry_run:
LOG.info('Armada is in DRY RUN mode, no changes being made.')
msg = {'install': [], 'upgrade': [], 'diff': []}
msg = {
'install': [],
'upgrade': [],
'diff': [],
'purge': [],
'protected': []
}
# TODO: (gardlt) we need to break up this func into
# a more cleaner format
self.pre_flight_ops()
# extract known charts on tiller right now
known_releases = self.tiller.list_charts()
deployed_releases, failed_releases = self._get_releases_by_status()
manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {})
prefix = manifest_data.get(const.KEYWORD_PREFIX, '')
prefix = manifest_data.get(const.KEYWORD_PREFIX)
for chartgroup in manifest_data.get(const.KEYWORD_GROUPS, []):
cg_name = chartgroup.get('name', '<missing name>')
@ -270,15 +266,39 @@ class Armada(object):
chart = chart_entry.get('chart', {})
namespace = chart.get('namespace')
release = chart.get('release')
release_name = release_prefixer(prefix, release)
values = chart.get('values', {})
pre_actions = {}
post_actions = {}
protected = chart.get('protected', {})
p_continue = protected.get('continue_processing', False)
# Check for existing FAILED release, and purge
if release_name in [rel[0] for rel in failed_releases]:
LOG.info('Purging FAILED release %s before deployment.',
release_name)
if protected:
if p_continue:
LOG.warn('Release %s is `protected`, '
'continue_processing=True. Operator must '
'handle FAILED release manually.',
release_name)
msg['protected'].append(release_name)
continue
else:
LOG.error('Release %s is `protected`, '
'continue_processing=False.',
release_name)
raise armada_exceptions.ProtectedReleaseException(
release_name)
else:
# Purge the release
self.tiller.uninstall_release(release_name)
msg['purge'].append(release_name)
wait_timeout = self.timeout
wait_labels = {}
release_name = release_prefixer(prefix, release)
# Retrieve appropriate timeout value
if wait_timeout <= 0:
# TODO(MarshM): chart's `data.timeout` should be deprecated
@ -308,15 +328,13 @@ class Armada(object):
chartbuilder = ChartBuilder(chart)
protoc_chart = chartbuilder.get_helm_chart()
deployed_releases = [x[0] for x in known_releases]
# Begin Chart timeout deadline
deadline = time.time() + wait_timeout
# TODO(mark-burnett): It may be more robust to directly call
# tiller status to decide whether to install/upgrade rather
# than checking for list membership.
if release_name in deployed_releases:
if release_name in [rel[0] for rel in deployed_releases]:
# indicate to the end user what path we are taking
LOG.info("Upgrading release %s in namespace %s",
@ -324,7 +342,7 @@ class Armada(object):
# extract the installed chart and installed values from the
# latest release so we can compare to the intended state
apply_chart, apply_values = self.find_release_chart(
known_releases, release_name)
deployed_releases, release_name)
upgrade = chart.get('upgrade', {})
disable_hooks = upgrade.get('no_hooks', False)
@ -424,7 +442,7 @@ class Armada(object):
reason = ('Timeout expired before testing sequenced '
'release %s' % release_name)
LOG.error(reason)
raise ArmadaTimeoutException(reason)
raise armada_exceptions.ArmadaTimeoutException(reason)
self._test_chart(release_name, timer)
# TODO(MarshM): handle test failure or timeout
@ -453,7 +471,7 @@ class Armada(object):
reason = ('Timeout expired waiting on namespace: %s, '
'labels: (%s)' % (ns, labels_dict))
LOG.error(reason)
raise ArmadaTimeoutException(reason)
raise armada_exceptions.ArmadaTimeoutException(reason)
self._wait_until_ready(
release_name=None, wait_labels=labels_dict,
@ -470,7 +488,8 @@ class Armada(object):
if self.enable_chart_cleanup:
self._chart_cleanup(
prefix,
self.manifest[const.KEYWORD_ARMADA][const.KEYWORD_GROUPS])
self.manifest[const.KEYWORD_ARMADA][const.KEYWORD_GROUPS],
msg)
LOG.info('Done applying manifest.')
return msg
@ -568,7 +587,7 @@ class Armada(object):
return result
def _chart_cleanup(self, prefix, charts):
def _chart_cleanup(self, prefix, charts, msg):
LOG.info('Processing chart cleanup to remove unspecified releases.')
valid_releases = []
@ -585,3 +604,4 @@ class Armada(object):
LOG.info('Purging release %s as part of chart cleanup.',
release)
self.tiller.uninstall_release(release)
msg['purge'].append(release)

View File

@ -594,6 +594,13 @@ class Tiller(object):
get_jobs = self.k8s.get_namespace_job(namespace, label_selector)
for jb in get_jobs.items:
jb_name = jb.metadata.name
if self.dry_run:
LOG.info('Skipping delete job during `dry-run`, would '
'have deleted job %s in namespace=%s.',
jb_name, namespace)
continue
LOG.info("Deleting job %s in namespace: %s",
jb_name, namespace)
self.k8s.delete_job_action(jb_name, namespace, timeout=timeout)
@ -604,22 +611,36 @@ class Tiller(object):
namespace, label_selector)
for jb in get_jobs.items:
jb_name = jb.metadata.name
LOG.info("Deleting cron job %s in namespace: %s",
jb_name, namespace)
if resource_type == 'job':
# TODO: Eventually disallow this, allowing initially since
# some existing clients were expecting this behavior.
LOG.warning("Deleting cron jobs via `type: job` is "
"deprecated, use `type: cronjob` instead")
LOG.warn("Deleting cronjobs via `type: job` is "
"deprecated, use `type: cronjob` instead")
if self.dry_run:
LOG.info('Skipping delete cronjob during `dry-run`, would '
'have deleted cronjob %s in namespace=%s.',
jb_name, namespace)
continue
LOG.info("Deleting cronjob %s in namespace: %s",
jb_name, namespace)
self.k8s.delete_cron_job_action(jb_name, namespace)
handled = True
if resource_type == 'pod':
release_pods = self.k8s.get_namespace_pod(
namespace, label_selector)
for pod in release_pods.items:
pod_name = pod.metadata.name
if self.dry_run:
LOG.info('Skipping delete pod during `dry-run`, would '
'have deleted pod %s in namespace=%s.',
pod_name, namespace)
continue
LOG.info("Deleting pod %s in namespace: %s",
pod_name, namespace)
self.k8s.delete_namespace_pod(pod_name, namespace)

View File

@ -52,6 +52,12 @@ data:
type: array
items:
type: string
protected:
type: object
properties:
continue_processing:
type: boolean
additionalProperties: false
test:
type: boolean
timeout:

View File

@ -19,7 +19,7 @@ from armada import const
from armada.handlers import armada
from armada.tests.unit import base
from armada.utils.release import release_prefixer
from armada.exceptions.armada_exceptions import ProtectedReleaseException
TEST_YAML = """
---
@ -42,6 +42,31 @@ data:
chart_group:
- example-chart-1
- example-chart-2
- example-chart-3
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: example-chart-3
data:
chart_name: test_chart_3
release: test_chart_3
namespace: test
values: {}
source:
type: local
location: /tmp/dummy/armada
subpath: chart_3
dependencies: []
protected:
continue_processing: false
wait:
timeout: 10
upgrade:
no_hooks: false
options:
force: true
recreate_pods: true
---
schema: armada/Chart/v1
metadata:
@ -57,6 +82,8 @@ data:
location: /tmp/dummy/armada
subpath: chart_2
dependencies: []
protected:
continue_processing: true
wait:
timeout: 10
upgrade:
@ -85,7 +112,8 @@ data:
"""
CHART_SOURCES = [('git://github.com/dummy/armada', 'chart_1'),
('/tmp/dummy/armada', 'chart_2')]
('/tmp/dummy/armada', 'chart_2'),
('/tmp/dummy/armada', 'chart_3')]
class ArmadaHandlerTestCase(base.ArmadaTestCase):
@ -124,6 +152,9 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
'dependencies': [],
'chart_name': 'test_chart_2',
'namespace': 'test',
'protected': {
'continue_processing': True
},
'release': 'test_chart_2',
'source': {
'location': '/tmp/dummy/armada',
@ -143,6 +174,34 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
}
}
}
},
{
'chart': {
'dependencies': [],
'chart_name': 'test_chart_3',
'namespace': 'test',
'protected': {
'continue_processing': False
},
'release': 'test_chart_3',
'source': {
'location': '/tmp/dummy/armada',
'subpath': 'chart_3',
'type': 'local'
},
'source_dir': CHART_SOURCES[2],
'values': {},
'wait': {
'timeout': 10
},
'upgrade': {
'no_hooks': False,
'options': {
'force': True,
'recreate_pods': True
}
}
}
}
],
'description': 'this is a test',
@ -180,43 +239,6 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
'git://github.com/dummy/armada', 'master', auth_method=None,
proxy_server=None)
@mock.patch.object(armada, 'source')
@mock.patch('armada.handlers.armada.Tiller')
def test_pre_flight_ops_with_failed_releases(self, mock_tiller,
mock_source):
"""Test pre-flight functions uninstalls failed Tiller releases."""
yaml_documents = list(yaml.safe_load_all(TEST_YAML))
armada_obj = armada.Armada(yaml_documents)
# Mock methods called by `pre_flight_ops()`.
m_tiller = mock_tiller.return_value
m_tiller.tiller_status.return_value = True
mock_source.git_clone.return_value = CHART_SOURCES[0][0]
# Only the first two releases failed and should be uninstalled. Armada
# looks at index [4] for each release to determine the status.
m_tiller.list_charts.return_value = [
['armada-test_chart_1', None, None, None, const.STATUS_FAILED],
['armada-test_chart_2', None, None, None, const.STATUS_FAILED],
[None, None, None, None, const.STATUS_DEPLOYED]
]
self._test_pre_flight_ops(armada_obj)
# Assert both failed releases were uninstalled.
m_tiller.uninstall_release.assert_has_calls([
mock.call('armada-test_chart_1'),
mock.call('armada-test_chart_2')
])
mock_tiller.assert_called_once_with(tiller_host=None,
tiller_namespace='kube-system',
tiller_port=44134,
dry_run=False)
mock_source.git_clone.assert_called_once_with(
'git://github.com/dummy/armada', 'master', auth_method=None,
proxy_server=None)
@mock.patch.object(armada, 'source')
@mock.patch('armada.handlers.armada.Tiller')
def test_post_flight_ops(self, mock_tiller, mock_source):
@ -267,6 +289,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
expected_install_release_calls = []
expected_update_release_calls = []
expected_uninstall_release_calls = []
for c in charts:
chart = c['chart']
@ -291,28 +314,63 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
)
)
else:
upgrade = chart.get('upgrade', {})
disable_hooks = upgrade.get('no_hooks', False)
force = upgrade.get('force', False)
recreate_pods = upgrade.get('recreate_pods', False)
target_release = None
for known_release in known_releases:
if known_release[0] == release_name:
target_release = known_release
break
if target_release:
status = target_release[4]
if status == const.STATUS_FAILED:
protected = chart.get('protected', {})
if not protected:
expected_uninstall_release_calls.append(
mock.call(release_name))
expected_install_release_calls.append(
mock.call(
mock_chartbuilder().get_helm_chart(),
"{}-{}".format(
armada_obj.manifest['armada'][
'release_prefix'],
chart['release']),
chart['namespace'],
values=yaml.safe_dump(chart['values']),
wait=this_chart_should_wait,
timeout=chart['wait']['timeout']
)
)
else:
p_continue = protected.get(
'continue_processing', False)
if p_continue:
continue
else:
break
expected_update_release_calls.append(
mock.call(
mock_chartbuilder().get_helm_chart(),
"{}-{}".format(armada_obj.manifest['armada'][
'release_prefix'],
chart['release']),
chart['namespace'],
pre_actions={},
post_actions={},
disable_hooks=disable_hooks,
force=force,
recreate_pods=recreate_pods,
values=yaml.safe_dump(chart['values']),
wait=this_chart_should_wait,
timeout=chart['wait']['timeout']
)
)
if status == const.STATUS_DEPLOYED:
upgrade = chart.get('upgrade', {})
disable_hooks = upgrade.get('no_hooks', False)
force = upgrade.get('force', False)
recreate_pods = upgrade.get('recreate_pods', False)
expected_update_release_calls.append(
mock.call(
mock_chartbuilder().get_helm_chart(),
"{}-{}".format(
armada_obj.manifest['armada'][
'release_prefix'],
chart['release']),
chart['namespace'],
pre_actions={},
post_actions={},
disable_hooks=disable_hooks,
force=force,
recreate_pods=recreate_pods,
values=yaml.safe_dump(chart['values']),
wait=this_chart_should_wait,
timeout=chart['wait']['timeout']
)
)
# Verify that at least 1 release is either installed or updated.
self.assertTrue(
@ -364,6 +422,42 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
]
self._test_sync(known_releases)
def test_armada_sync_with_unprotected_releases(self):
c1 = 'armada-test_chart_1'
known_releases = [
[c1, None, self._get_chart_by_name(c1), None,
const.STATUS_FAILED]
]
self._test_sync(known_releases)
def test_armada_sync_with_protected_releases_continue(self):
c1 = 'armada-test_chart_1'
c2 = 'armada-test_chart_2'
known_releases = [
[c2, None, self._get_chart_by_name(c2), None,
const.STATUS_FAILED],
[c1, None, self._get_chart_by_name(c1), None,
const.STATUS_FAILED]
]
self._test_sync(known_releases)
def test_armada_sync_with_protected_releases_halt(self):
c3 = 'armada-test_chart_3'
known_releases = [
[c3, None, self._get_chart_by_name(c3), None,
const.STATUS_FAILED]
]
def _test_method():
self._test_sync(known_releases)
self.assertRaises(
ProtectedReleaseException,
_test_method)
@mock.patch.object(armada.Armada, 'post_flight_ops')
@mock.patch.object(armada.Armada, 'pre_flight_ops')
@mock.patch('armada.handlers.armada.ChartBuilder')

View File

@ -25,3 +25,8 @@
:members:
:show-inheritance:
:undoc-members:
* - ProtectedReleaseException
- .. autoexception:: armada.exceptions.armada_exceptions.ProtectedReleaseException
:members:
:show-inheritance:
:undoc-members:

View File

@ -95,6 +95,9 @@ Chart
+-----------------+----------+---------------------------------------------------------------------------------------+
| wait | object | contains wait information such as (timeout, labels) |
+-----------------+----------+---------------------------------------------------------------------------------------+
| protected | object | do not delete FAILED releases when encountered from previous run (provide the |
| | | 'continue_processing' bool to continue or halt execution (default: halt)) |
+-----------------+----------+---------------------------------------------------------------------------------------+
| test | bool | run pre-defined helm tests helm in a chart |
+-----------------+----------+---------------------------------------------------------------------------------------+
| install | object | install the chart into your Kubernetes cluster |
@ -176,6 +179,8 @@ Chart Example
namespace: default
wait:
timeout: 100
protected:
continue_processing: false
install:
no_hooks: false
upgrade:
@ -237,7 +242,7 @@ Source Example
wait:
timeout: 100
labels:
component: blog
component: blog
install:
no_hooks: false
upgrade: