[v2 docs] Overhaul wait API

See the v1-v2 migration guide updates in this commit for details.

Change-Id: I6a8a69f8392e8065eda039597278c7dfe593a4fd
This commit is contained in:
Sean Eagan 2019-02-12 12:51:16 -06:00
parent b881e176f5
commit 66498250b9
7 changed files with 463 additions and 167 deletions

View File

@ -63,7 +63,7 @@ class ChartDeploy(object):
chart_wait = ChartWait( chart_wait = ChartWait(
self.tiller.k8s, self.tiller.k8s,
release_name, release_name,
chart, ch,
namespace, namespace,
k8s_wait_attempts=self.k8s_wait_attempts, k8s_wait_attempts=self.k8s_wait_attempts,
k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep, k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,

View File

@ -14,23 +14,27 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import collections import collections
import copy
import math import math
import re import re
import time import time
from kubernetes import watch
from oslo_log import log as logging from oslo_log import log as logging
from armada import const from armada import const
from armada.utils.helm import is_test_pod
from armada.utils.release import label_selectors
from armada.exceptions import k8s_exceptions from armada.exceptions import k8s_exceptions
from armada.exceptions import manifest_exceptions from armada.exceptions import manifest_exceptions
from armada.exceptions import armada_exceptions from armada.exceptions import armada_exceptions
from kubernetes import watch from armada.handlers.schema import get_schema_info
from armada.utils.helm import is_test_pod
from armada.utils.release import label_selectors
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
ROLLING_UPDATE_STRATEGY_TYPE = 'RollingUpdate' ROLLING_UPDATE_STRATEGY_TYPE = 'RollingUpdate'
ASYNC_UPDATE_NOT_ALLOWED_MSG = 'Async update not allowed: '
def get_wait_labels(chart): def get_wait_labels(chart):
@ -46,36 +50,52 @@ class ChartWait():
self.k8s = k8s self.k8s = k8s
self.release_name = release_name self.release_name = release_name
self.chart = chart self.chart = chart
self.wait_config = chart.get('wait', {}) chart_data = self.chart[const.KEYWORD_DATA]
self.chart_data = chart_data
self.wait_config = self.chart_data.get('wait', {})
self.namespace = namespace self.namespace = namespace
self.k8s_wait_attempts = max(k8s_wait_attempts, 1) self.k8s_wait_attempts = max(k8s_wait_attempts, 1)
self.k8s_wait_attempt_sleep = max(k8s_wait_attempt_sleep, 1) self.k8s_wait_attempt_sleep = max(k8s_wait_attempt_sleep, 1)
resources = self.wait_config.get('resources') schema_info = get_schema_info(self.chart['schema'])
labels = get_wait_labels(self.chart)
if resources is not None: resources = self.wait_config.get('resources')
waits = [] if isinstance(resources, list):
for resource_config in resources: # Explicit resource config list provided.
# Initialize labels resources_list = resources
resource_config.setdefault('labels', {})
# Add base labels
resource_config['labels'].update(labels)
waits.append(self.get_resource_wait(resource_config))
else: else:
waits = [ # TODO: Remove when v1 doc support is removed.
JobWait('job', self, labels, skip_if_none_found=True), if schema_info.version < 2:
PodWait('pod', self, labels) resources_list = [{
] 'type': 'job',
self.waits = waits 'required': False
}, {
'type': 'pod'
}]
else:
resources_list = self.get_resources_list(resources)
chart_labels = get_wait_labels(self.chart_data)
for resource_config in resources_list:
# Use chart labels as base labels for each config.
labels = dict(chart_labels)
resource_labels = resource_config.get('labels', {})
# Merge in any resource-specific labels.
if resource_labels:
labels.update(resource_labels)
resource_config['labels'] = labels
LOG.debug('Resolved `wait.resources` list: %s', resources_list)
self.waits = [self.get_resource_wait(conf) for conf in resources_list]
# Calculate timeout # Calculate timeout
wait_timeout = timeout wait_timeout = timeout
if wait_timeout is None: if wait_timeout is None:
wait_timeout = self.wait_config.get('timeout') wait_timeout = self.wait_config.get('timeout')
# TODO(MarshM): Deprecated, remove `timeout` key. # TODO: Remove when v1 doc support is removed.
deprecated_timeout = self.chart.get('timeout') deprecated_timeout = self.chart_data.get('timeout')
if deprecated_timeout is not None: if deprecated_timeout is not None:
LOG.warn('The `timeout` key is deprecated and support ' LOG.warn('The `timeout` key is deprecated and support '
'for this will be removed soon. Use ' 'for this will be removed soon. Use '
@ -90,12 +110,19 @@ class ChartWait():
self.timeout = wait_timeout self.timeout = wait_timeout
# Determine whether to enable native wait.
native = self.wait_config.get('native', {})
# TODO: Remove when v1 doc support is removed.
default_native = schema_info.version < 2
self.native_enabled = native.get('enabled', default_native)
def get_timeout(self): def get_timeout(self):
return self.timeout return self.timeout
def is_native_enabled(self): def is_native_enabled(self):
native_wait = self.wait_config.get('native', {}) return self.native_enabled
return native_wait.get('enabled', True)
def wait(self, timeout): def wait(self, timeout):
deadline = time.time() + timeout deadline = time.time() + timeout
@ -104,6 +131,54 @@ class ChartWait():
wait.wait(timeout=timeout) wait.wait(timeout=timeout)
timeout = int(round(deadline - time.time())) timeout = int(round(deadline - time.time()))
def get_resources_list(self, resources):
# Use default resource configs, with any provided resource type
# overrides merged in.
# By default, wait on all supported resource types.
resource_order = [
# Jobs may perform initialization so add them first.
'job',
'daemonset',
'statefulset',
'deployment',
'pod'
]
base_resource_config = {
# By default, skip if none found so we don't fail on charts
# which don't contain resources of a given type.
'required': False
}
# Create a map of resource types to default configs.
resource_configs = collections.OrderedDict(
[(type, base_resource_config) for type in resource_order])
# Handle any overrides and/or removals of resource type configs.
if resources:
for type, v in resources.items():
if v is False:
# Remove this type.
resource_configs.pop(type)
else:
# Override config for this type.
resource_configs[type] = v
resources_list = []
# Convert the resource type map to a list of fully baked resource
# configs with type included.
for type, config in resource_configs.items():
if isinstance(config, list):
configs = config
else:
configs = [config]
for conf in configs:
resource_config = copy.deepcopy(conf)
resource_config['type'] = type
resources_list.append(resource_config)
return resources_list
def get_resource_wait(self, resource_config): def get_resource_wait(self, resource_config):
kwargs = dict(resource_config) kwargs = dict(resource_config)
@ -138,12 +213,12 @@ class ResourceWait(ABC):
chart_wait, chart_wait,
labels, labels,
get_resources, get_resources,
skip_if_none_found=False): required=True):
self.resource_type = resource_type self.resource_type = resource_type
self.chart_wait = chart_wait self.chart_wait = chart_wait
self.label_selector = label_selectors(labels) self.label_selector = label_selectors(labels)
self.get_resources = get_resources self.get_resources = get_resources
self.skip_if_none_found = skip_if_none_found self.required = required
@abstractmethod @abstractmethod
def is_resource_ready(self, resource): def is_resource_ready(self, resource):
@ -174,19 +249,19 @@ class ResourceWait(ABC):
def handle_resource(self, resource): def handle_resource(self, resource):
resource_name = resource.metadata.name resource_name = resource.metadata.name
resource_desc = '{} {}'.format(self.resource_type, resource_name)
try: try:
message, resource_ready = self.is_resource_ready(resource) message, resource_ready = self.is_resource_ready(resource)
if resource_ready: if resource_ready:
LOG.debug('Resource %s is ready!', resource_name) LOG.debug('%s is ready!', resource_desc)
else: else:
LOG.debug('Resource %s not ready: %s', resource_name, message) LOG.debug('%s not ready: %s', resource_desc, message)
return resource_ready return resource_ready
except armada_exceptions.WaitException as e: except armada_exceptions.WaitException as e:
LOG.warn('Resource %s unlikely to become ready: %s', resource_name, LOG.warn('%s unlikely to become ready: %s', resource_desc, e)
e)
return False return False
def wait(self, timeout): def wait(self, timeout):
@ -194,12 +269,13 @@ class ResourceWait(ABC):
:param timeout: time before disconnecting ``Watch`` stream :param timeout: time before disconnecting ``Watch`` stream
''' '''
min_ready_msg = ', min_ready={}'.format(
self.min_ready.source) if isinstance(self, ControllerWait) else ''
LOG.info( LOG.info(
"Waiting for resource type=%s, namespace=%s labels=%s for %ss " "Waiting for resource type=%s, namespace=%s labels=%s "
"(k8s wait %s times, sleep %ss)", self.resource_type, "required=%s%s for %ss", self.resource_type,
self.chart_wait.namespace, self.label_selector, timeout, self.chart_wait.namespace, self.label_selector, self.required,
self.chart_wait.k8s_wait_attempts, min_ready_msg, timeout)
self.chart_wait.k8s_wait_attempt_sleep)
if not self.label_selector: if not self.label_selector:
LOG.warn('"label_selector" not specified, waiting with no labels ' LOG.warn('"label_selector" not specified, waiting with no labels '
'may cause unintended consequences.') 'may cause unintended consequences.')
@ -207,60 +283,73 @@ class ResourceWait(ABC):
# Track the overall deadline for timing out during waits # Track the overall deadline for timing out during waits
deadline = time.time() + timeout deadline = time.time() + timeout
# NOTE(mark-burnett): Attempt to wait multiple times without schema_info = get_schema_info(self.chart_wait.chart['schema'])
# modification, in case new resources appear after our watch exits. # TODO: Remove when v1 doc support is removed.
if schema_info.version < 2:
successes = 0 # NOTE(mark-burnett): Attempt to wait multiple times without
while True: # modification, in case new resources appear after our watch exits.
deadline_remaining = int(round(deadline - time.time())) successes = 0
if deadline_remaining <= 0: while True:
error = ( modified = self._wait(deadline)
"Timed out waiting for resource type={}, namespace={}, " if modified is None:
"labels={}".format(self.resource_type, break
self.chart_wait.namespace, if modified:
self.label_selector)) successes = 0
LOG.error(error) LOG.debug('Found modified resources: %s', sorted(modified))
raise k8s_exceptions.KubernetesWatchTimeoutException(error)
timed_out, modified, unready, found_resources = (
self._watch_resource_completions(timeout=deadline_remaining))
if (not found_resources) and self.skip_if_none_found:
return
if timed_out:
if not found_resources:
details = (
'None found! Are `wait.labels` correct? Does '
'`wait.resources` need to exclude `type: {}`?'.format(
self.resource_type))
else: else:
details = ('These {}s were not ready={}'.format( successes += 1
self.resource_type, sorted(unready))) LOG.debug('Found no modified resources.')
error = (
'Timed out waiting for {}s (namespace={}, labels=({})). {}'
.format(self.resource_type, self.chart_wait.namespace,
self.label_selector, details))
LOG.error(error)
raise k8s_exceptions.KubernetesWatchTimeoutException(error)
if modified: if successes >= self.chart_wait.k8s_wait_attempts:
successes = 0 return
LOG.debug('Found modified resources: %s', sorted(modified))
LOG.debug(
'Continuing to wait: %s consecutive attempts without '
'modified resources of %s required.', successes,
self.chart_wait.k8s_wait_attempts)
time.sleep(self.chart_wait.k8s_wait_attempt_sleep)
else:
self._wait(deadline)
def _wait(self, deadline):
'''
Waits for resources to become ready.
Returns whether resources were modified, or `None` if that is to be
ignored.
'''
deadline_remaining = int(round(deadline - time.time()))
if deadline_remaining <= 0:
error = ("Timed out waiting for resource type={}, namespace={}, "
"labels={}".format(self.resource_type,
self.chart_wait.namespace,
self.label_selector))
LOG.error(error)
raise k8s_exceptions.KubernetesWatchTimeoutException(error)
timed_out, modified, unready, found_resources = (
self._watch_resource_completions(timeout=deadline_remaining))
if (not found_resources) and not self.required:
return None
if timed_out:
if not found_resources:
details = (
'None found! Are `wait.labels` correct? Does '
'`wait.resources` need to exclude `type: {}`?'.format(
self.resource_type))
else: else:
successes += 1 details = ('These {}s were not ready={}'.format(
LOG.debug('Found no modified resources.') self.resource_type, sorted(unready)))
error = (
'Timed out waiting for {}s (namespace={}, labels=({})). {}'.
format(self.resource_type, self.chart_wait.namespace,
self.label_selector, details))
LOG.error(error)
raise k8s_exceptions.KubernetesWatchTimeoutException(error)
if successes >= self.chart_wait.k8s_wait_attempts: return modified
break
LOG.debug(
'Continuing to wait: %s consecutive attempts without '
'modified resources of %s required.', successes,
self.chart_wait.k8s_wait_attempts)
time.sleep(self.chart_wait.k8s_wait_attempt_sleep)
return True
def _watch_resource_completions(self, timeout): def _watch_resource_completions(self, timeout):
''' '''
@ -288,8 +377,8 @@ class ResourceWait(ABC):
if self.include_resource(resource): if self.include_resource(resource):
ready[resource.metadata.name] = self.handle_resource(resource) ready[resource.metadata.name] = self.handle_resource(resource)
if not resource_list.items: if not resource_list.items:
if self.skip_if_none_found: if not self.required:
msg = 'Skipping wait, no %s resources found.' msg = 'Skipping non-required wait, no %s resources found.'
LOG.debug(msg, self.resource_type) LOG.debug(msg, self.resource_type)
return (False, modified, [], found_resources) return (False, modified, [], found_resources)
else: else:
@ -370,11 +459,19 @@ class PodWait(ResourceWait):
if is_test_pod(pod): if is_test_pod(pod):
return 'helm test pod' return 'helm test pod'
# Exclude job pods schema_info = get_schema_info(self.chart_wait.chart['schema'])
# TODO: Once controller-based waits are enabled by default, ignore # TODO: Remove when v1 doc support is removed.
# controller-owned pods as well. if schema_info.version < 2:
if has_owner(pod, 'Job'): # Exclude job pods
return 'generated by job (wait on that instead if not already)' if has_owner(pod, 'Job'):
return 'owned by job'
else:
# Exclude all pods with an owner (only include raw pods)
# TODO: In helm 3, all resources will likely have the release CR as
# an owner, so this will need to be updated to not exclude pods
# directly owned by the release.
if has_owner(pod):
return 'owned by another resource'
return None return None
@ -409,7 +506,7 @@ class JobWait(ResourceWait):
# Exclude cronjob jobs # Exclude cronjob jobs
if has_owner(job, 'CronJob'): if has_owner(job, 'CronJob'):
return 'generated by cronjob (not part of release)' return 'owned by cronjob (not part of release)'
return None return None
@ -493,10 +590,13 @@ class DeploymentWait(ControllerWait):
name = deployment.metadata.name name = deployment.metadata.name
spec = deployment.spec spec = deployment.spec
status = deployment.status status = deployment.status
gen = deployment.metadata.generation or 0 gen = deployment.metadata.generation or 0
observed_gen = status.observed_generation or 0 observed_gen = status.observed_generation or 0
if gen <= observed_gen: if gen <= observed_gen:
# TODO: Don't fail for lack of progress if `min_ready` is met.
# TODO: Consider continuing after `min_ready` is met, so long as
# progress is being made.
cond = self._get_resource_condition(status.conditions, cond = self._get_resource_condition(status.conditions,
'Progressing') 'Progressing')
if cond and (cond.reason or '') == 'ProgressDeadlineExceeded': if cond and (cond.reason or '') == 'ProgressDeadlineExceeded':
@ -531,30 +631,42 @@ class DeploymentWait(ControllerWait):
class DaemonSetWait(ControllerWait): class DaemonSetWait(ControllerWait):
def __init__(self, resource_type, chart_wait, labels, **kwargs): def __init__(self,
resource_type,
chart_wait,
labels,
allow_async_updates=False,
**kwargs):
super(DaemonSetWait, self).__init__( super(DaemonSetWait, self).__init__(
resource_type, chart_wait, labels, resource_type, chart_wait, labels,
chart_wait.k8s.apps_v1_api.list_namespaced_daemon_set, **kwargs) chart_wait.k8s.apps_v1_api.list_namespaced_daemon_set, **kwargs)
self.allow_async_updates = allow_async_updates
def is_resource_ready(self, resource): def is_resource_ready(self, resource):
daemon = resource daemon = resource
name = daemon.metadata.name name = daemon.metadata.name
spec = daemon.spec spec = daemon.spec
status = daemon.status status = daemon.status
if spec.update_strategy.type != ROLLING_UPDATE_STRATEGY_TYPE:
msg = ("Assuming non-readiness for strategy type {}, can only "
"determine for {}")
raise armada_exceptions.WaitException(
msg.format(spec.update_strategy.type,
ROLLING_UPDATE_STRATEGY_TYPE))
gen = daemon.metadata.generation or 0 gen = daemon.metadata.generation or 0
observed_gen = status.observed_generation or 0 observed_gen = status.observed_generation or 0
updated_number_scheduled = status.updated_number_scheduled or 0
desired_number_scheduled = status.desired_number_scheduled or 0 if not self.allow_async_updates:
number_available = status.number_available or 0 is_update = observed_gen > 1
if is_update:
strategy = spec.update_strategy.type or ''
is_rolling = strategy == ROLLING_UPDATE_STRATEGY_TYPE
if not is_rolling:
msg = "{}: update strategy type = {}"
raise armada_exceptions.WaitException(
msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG, strategy))
if gen <= observed_gen: if gen <= observed_gen:
updated_number_scheduled = status.updated_number_scheduled or 0
desired_number_scheduled = status.desired_number_scheduled or 0
number_available = status.number_available or 0
if (updated_number_scheduled < desired_number_scheduled): if (updated_number_scheduled < desired_number_scheduled):
msg = ("Waiting for daemon set {} rollout to finish: {} out " msg = ("Waiting for daemon set {} rollout to finish: {} out "
"of {} new pods have been updated...") "of {} new pods have been updated...")
@ -578,49 +690,58 @@ class DaemonSetWait(ControllerWait):
class StatefulSetWait(ControllerWait): class StatefulSetWait(ControllerWait):
def __init__(self, resource_type, chart_wait, labels, **kwargs): def __init__(self,
resource_type,
chart_wait,
labels,
allow_async_updates=False,
**kwargs):
super(StatefulSetWait, self).__init__( super(StatefulSetWait, self).__init__(
resource_type, chart_wait, labels, resource_type, chart_wait, labels,
chart_wait.k8s.apps_v1_api.list_namespaced_stateful_set, **kwargs) chart_wait.k8s.apps_v1_api.list_namespaced_stateful_set, **kwargs)
self.allow_async_updates = allow_async_updates
def is_resource_ready(self, resource): def is_resource_ready(self, resource):
sts = resource sts = resource
name = sts.metadata.name name = sts.metadata.name
spec = sts.spec spec = sts.spec
status = sts.status status = sts.status
update_strategy_type = spec.update_strategy.type or ''
if update_strategy_type != ROLLING_UPDATE_STRATEGY_TYPE:
msg = ("Assuming non-readiness for strategy type {}, can only "
"determine for {}")
raise armada_exceptions.WaitException(
msg.format(update_strategy_type, ROLLING_UPDATE_STRATEGY_TYPE))
gen = sts.metadata.generation or 0 gen = sts.metadata.generation or 0
observed_gen = status.observed_generation or 0 observed_gen = status.observed_generation or 0
if (observed_gen == 0 or gen > observed_gen):
msg = "Waiting for statefulset spec update to be observed..."
return (msg, False)
replicas = spec.replicas or 0 replicas = spec.replicas or 0
ready_replicas = status.ready_replicas or 0 ready_replicas = status.ready_replicas or 0
updated_replicas = status.updated_replicas or 0 updated_replicas = status.updated_replicas or 0
current_replicas = status.current_replicas or 0 current_replicas = status.current_replicas or 0
if not self.allow_async_updates:
is_update = observed_gen > 1
if is_update:
strategy = spec.update_strategy.type or ''
is_rolling = strategy == ROLLING_UPDATE_STRATEGY_TYPE
if not is_rolling:
msg = "{}: update strategy type = {}"
raise armada_exceptions.WaitException(
msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG, strategy))
if (is_rolling and replicas and
spec.update_strategy.rolling_update.partition):
msg = "{}: partitioned rollout"
raise armada_exceptions.WaitException(
msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG))
if (observed_gen == 0 or gen > observed_gen):
msg = "Waiting for statefulset spec update to be observed..."
return (msg, False)
if replicas and not self._is_min_ready(ready_replicas, replicas): if replicas and not self._is_min_ready(ready_replicas, replicas):
msg = ("Waiting for statefulset {} rollout to finish: {} of {} " msg = ("Waiting for statefulset {} rollout to finish: {} of {} "
"pods are ready, with min_ready={}") "pods are ready, with min_ready={}")
return (msg.format(name, ready_replicas, replicas, return (msg.format(name, ready_replicas, replicas,
self.min_ready.source), False) self.min_ready.source), False)
if (update_strategy_type == ROLLING_UPDATE_STRATEGY_TYPE and
spec.update_strategy.rolling_update):
if replicas and spec.update_strategy.rolling_update.partition:
msg = ("Waiting on partitioned rollout not supported, "
"assuming non-readiness of statefulset {}")
return (msg.format(name), False)
update_revision = status.update_revision or 0 update_revision = status.update_revision or 0
current_revision = status.current_revision or 0 current_revision = status.current_revision or 0

View File

@ -36,6 +36,16 @@ data:
required: required:
- type - type
additionalProperties: false additionalProperties: false
wait_resource_type_config:
properties:
labels:
$ref: '#/definitions/labels'
min_ready:
anyOf:
- type: integer
- type: string
required:
type: boolean
type: object type: object
properties: properties:
release: release:
@ -76,20 +86,22 @@ data:
timeout: timeout:
type: integer type: integer
resources: resources:
type: array anyOf:
items: - additionalProperties:
properties:
type:
type: string
labels:
$ref: '#/definitions/labels'
min_ready:
anyOf: anyOf:
- type: integer - $ref: '#/definitions/wait_resource_type_config'
- type: string - type: array
required: items:
- type $ref: '#/definitions/wait_resource_type_config'
additionalProperties: false - type: array
items:
allOf:
- $ref: '#/definitions/wait_resource_type_config'
- properties:
type:
type: string
required:
- type
labels: labels:
$ref: "#/definitions/labels" $ref: "#/definitions/labels"
# Config for helm's native `--wait` param. # Config for helm's native `--wait` param.
@ -126,8 +138,6 @@ data:
upgrade: upgrade:
type: object type: object
properties: properties:
no_hooks:
type: boolean
pre: pre:
type: object type: object
additionalProperties: false additionalProperties: false
@ -141,6 +151,8 @@ data:
type: boolean type: boolean
recreate_pods: recreate_pods:
type: boolean type: boolean
no_hooks:
type: boolean
additionalProperties: false additionalProperties: false
additionalProperties: false additionalProperties: false
required: required:

View File

@ -140,7 +140,7 @@ data:
wait: wait:
timeout: 10 timeout: 10
native: native:
enabled: false enabled: true
test: test:
enabled: true enabled: true
""" """
@ -195,7 +195,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
'wait': { 'wait': {
'timeout': 10, 'timeout': 10,
'native': { 'native': {
'enabled': False 'enabled': True
} }
}, },
'test': { 'test': {

View File

@ -24,7 +24,14 @@ test_chart = {'wait': {'timeout': 10, 'native': {'enabled': False}}}
class ChartWaitTestCase(base.ArmadaTestCase): class ChartWaitTestCase(base.ArmadaTestCase):
def get_unit(self, chart, timeout=None): def get_unit(self, chart_data, timeout=None, version=2):
chart = {
'schema': 'armada/Chart/v{}'.format(str(version)),
'metadata': {
'name': 'test'
},
const.KEYWORD_DATA: chart_data
}
return wait.ChartWait( return wait.ChartWait(
k8s=mock.MagicMock(), k8s=mock.MagicMock(),
release_name='test-test', release_name='test-test',
@ -44,7 +51,7 @@ class ChartWaitTestCase(base.ArmadaTestCase):
def test_get_timeout_override(self): def test_get_timeout_override(self):
unit = self.get_unit( unit = self.get_unit(
timeout=20, chart={ timeout=20, chart_data={
'timeout': 5, 'timeout': 5,
'wait': { 'wait': {
'timeout': 10 'timeout': 10
@ -57,9 +64,9 @@ class ChartWaitTestCase(base.ArmadaTestCase):
unit = self.get_unit({'timeout': 5}) unit = self.get_unit({'timeout': 5})
self.assertEquals(unit.get_timeout(), 5) self.assertEquals(unit.get_timeout(), 5)
def test_is_native_enabled_default_true(self): def test_is_native_enabled_default_false(self):
unit = self.get_unit({}) unit = self.get_unit({})
self.assertEquals(unit.is_native_enabled(), True) self.assertEquals(unit.is_native_enabled(), False)
def test_is_native_enabled_true(self): def test_is_native_enabled_true(self):
unit = self.get_unit({'wait': {'native': {'enabled': True}}}) unit = self.get_unit({'wait': {'native': {'enabled': True}}})
@ -188,9 +195,11 @@ class ChartWaitTestCase(base.ArmadaTestCase):
class PodWaitTestCase(base.ArmadaTestCase): class PodWaitTestCase(base.ArmadaTestCase):
def get_unit(self, labels): def get_unit(self, labels, version=2):
return wait.PodWait( return wait.PodWait(
resource_type='pod', chart_wait=mock.MagicMock(), labels=labels) resource_type='pod',
chart_wait=ChartWaitTestCase.get_unit(None, {}, version=version),
labels=labels)
def test_include_resource(self): def test_include_resource(self):
@ -223,7 +232,7 @@ class PodWaitTestCase(base.ArmadaTestCase):
mock_resource(owner_references=[mock.Mock(kind='NotAJob')]) mock_resource(owner_references=[mock.Mock(kind='NotAJob')])
] ]
unit = self.get_unit({}) unit = self.get_unit({}, version=1)
# Validate test pods excluded # Validate test pods excluded
for pod in test_pods: for pod in test_pods:

View File

@ -53,6 +53,68 @@ Chart
| ``source.subpath`` | | | ``source.subpath`` | |
| now optional | | | now optional | |
+--------------------------------+------------------------------------------------------------+ +--------------------------------+------------------------------------------------------------+
| ``wait`` improvements | See `Wait Improvements`_. |
+--------------------------------+------------------------------------------------------------+
Wait Improvements
^^^^^^^^^^^^^^^^^
The :ref:`v2 wait API <wait_v2>` includes the following changes.
Breaking changes
****************
1. ``wait.resources`` now defaults to all supported resource ``type`` s,
currently ``job``, ``daemonset``, ``statefulset``, ``deployment``, and ``pod``, with
``required`` (a new option) set to ``false``. The previous default was
the equivalent of pods with ``required=true``, and jobs with
``required=false``.
2. ``type: pod`` waits now exclude pods owned by other resources, such
as controllers, as one should instead wait directly on the controller itself,
which per 1. is now the default.
3. Waits are no longer retried due to resources having been modified. This was
mildly useful before as an indicator of whether all targeted resources were
accounted for, but with 1. and 2. above, we are now tracking top-level
resources directly included in the release, rather than generated resources,
such as controller-owned pods, so there is no need to them to come into
existence.
4. ``wait.native.enabled`` is now disabled by default. With the above changes,
this is no longer useful as a backup mechanism. Having both enabled leads to
ambiguity in which wait would fail in each case. More importantly, this must
be disabled in order to use the ``min_ready`` functionality, otherwise tiller
will wait for 100% anyway. So this prevents accidentally leaving it enabled
in that case. Also when the tiller native wait times out, this caused the
release to be marked FAILED by tiller, which caused it to be purged and
re-installed (unless protected), even though the wait criteria may have
eventually succeeded, which is already validated by armada on a retry.
New features
************
Per-resource-type overrides
+++++++++++++++++++++++++++
``wait.resources`` can now be a dict, mapping individual resource types to
wait configurations (or lists thereof), such that one can keep the default
configuration for the other resource types, and also disable a given resource
type, by mapping it to ``false``.
The ability to provide the entire explicit list for ``wait.resources`` remains in
place as well.
required
++++++++
A ``required`` field is also exposed for items/values in ``wait.resources``.
allow_async_updates
+++++++++++++++++++
An ``allow_async_updates`` field is added to daemonset and statefulset type
items/values in ``wait.resources``.
ChartGroup ChartGroup
---------- ----------

View File

@ -124,6 +124,8 @@ Chart
| dependencies | object | (optional) reference any chart dependencies before install | | dependencies | object | (optional) reference any chart dependencies before install |
+-----------------+----------+---------------------------------------------------------------------------------------+ +-----------------+----------+---------------------------------------------------------------------------------------+
.. _wait_v2:
Wait Wait
^^^^ ^^^^
@ -132,8 +134,25 @@ Wait
+=============+==========+====================================================================+ +=============+==========+====================================================================+
| timeout | int | time (in seconds) to wait for chart to deploy | | timeout | int | time (in seconds) to wait for chart to deploy |
+-------------+----------+--------------------------------------------------------------------+ +-------------+----------+--------------------------------------------------------------------+
| resources | array | Array of `Wait Resource`_ to wait on, with ``labels`` added to each| | resources | dict \| | `Wait Resource`_ s to wait on. Defaults to all supported resource |
| | | item. Defaults to pods and jobs (if any exist) matching ``labels``.| | | array | types (see `Wait Resource`_ ``.type``), with |
| | | ``required: false``. |
| | | |
| | | **dict** - Maps resource types to one of: |
| | | |
| | | - `Wait Resource`_ : config for resource type. |
| | | |
| | | - list[ `Wait Resource`_ ] - multiple configs for resource type. |
| | | |
| | | - ``false`` - disable waiting for resource type. |
| | | |
| | | Any resource types not overridden retain the default config. |
| | | |
| | | **array** - Lists all `Wait Resource`_ s to use, completely |
| | | overriding the default. Can be set to ``[]`` to disable all |
| | | resource types. |
| | | |
| | | See also `Wait Resources Examples`_. |
+-------------+----------+--------------------------------------------------------------------+ +-------------+----------+--------------------------------------------------------------------+
| labels | object | Base mapping of labels to wait on. They are added to any labels in | | labels | object | Base mapping of labels to wait on. They are added to any labels in |
| | | each item in the ``resources`` array. | | | | each item in the ``resources`` array. |
@ -143,18 +162,91 @@ Wait
Wait Resource Wait Resource
^^^^^^^^^^^^^ ^^^^^^^^^^^^^
+-------------+----------+--------------------------------------------------------------------+
| keyword | type | action | +----------------------------+----------+--------------------------------------------------------------------+
+=============+==========+====================================================================+ | keyword | type | action |
| type | string | k8s resource type, supports: controllers ('deployment', | +============================+==========+====================================================================+
| | | 'daemonset', 'statefulset'), 'pod', 'job' | | type | string | K8s resource type, supports: 'deployment', 'daemonset', |
+-------------+----------+--------------------------------------------------------------------+ | | | 'statefulset', 'pod', 'job'. |
| labels | object | mapping of kubernetes resource labels | | | | |
+-------------+----------+--------------------------------------------------------------------+ | | | NOTE: Omit when Wait_ ``.resources`` is a dict, as then the dict |
| min\_ready | int | Only for controller ``type``s. Amount of pods in a controller | | | | key is used instead. |
| | string | which must be ready. Can be integer or percent string e.g. ``80%``.| +----------------------------+----------+--------------------------------------------------------------------+
| | | Default ``100%``. | | labels | object | Kubernetes labels specific to this resource. |
+-------------+----------+--------------------------------------------------------------------+ | | | Wait_``.labels`` are included with these, so only define this if |
| | | additional labels are needed to identify the targeted resources. |
+----------------------------+----------+--------------------------------------------------------------------+
| min\_ready | int \| | Only for controller ``type`` s. Amount of pods in a controller |
| | string | which must be ready. Can be integer or percent string e.g. ``80%``.|
| | | Default ``100%``. |
+----------------------------+----------+--------------------------------------------------------------------+
| allow\_async\_updates | boolean | Only for ``daemonset`` and ``statefulset`` ``type`` s. Whether to |
| | | wait for async update strategies, i.e. OnDelete or partitioned |
| | | RollingUpdate. Defaults to ``false`` in order to fail fast in |
| | | cases where the async update is not expected to complete until |
| | | same point later on. |
+----------------------------+----------+--------------------------------------------------------------------+
| required | boolean | Whether to require the resource to be found. |
| | | Defaults to ``true`` for explicit items in ```wait.resources``. |
| | | See ``wait.resources`` for its overall defaults. |
+----------------------------+----------+--------------------------------------------------------------------+
Wait Resources Examples
^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: yaml
wait:
# ...
# Disable all waiting.
native:
enabled: false
resources: []
.. code-block:: yaml
wait:
# ...
# Disable waiting for a given type (job).
resources:
job: false
.. code-block:: yaml
wait:
# ...
# Use min_ready < 100%.
resources:
daemonset:
min_ready: 80%
.. code-block:: yaml
wait:
resources:
# Multiple configs for same type.
daemonset:
- labels:
component: one
min_ready: 80%
- labels:
component: two
min_ready: 50%
.. code-block:: yaml
wait:
# ...
resources:
- type: daemonset
labels:
component: critical
min_ready: 100%
- type: daemonset
labels:
component: best_effort
min_ready: 80%
# ... (re-include any other resource types needed when using list)
Wait Native Wait Native
^^^^^^^^^^^ ^^^^^^^^^^^
@ -164,7 +256,7 @@ Config for the native ``helm (install|upgrade) --wait`` flag.
+-------------+----------+--------------------------------------------------------------------+ +-------------+----------+--------------------------------------------------------------------+
| keyword | type | action | | keyword | type | action |
+=============+==========+====================================================================+ +=============+==========+====================================================================+
| enabled | boolean | defaults to true | | enabled | boolean | defaults to false |
+-------------+----------+--------------------------------------------------------------------+ +-------------+----------+--------------------------------------------------------------------+
.. _test_v2: .. _test_v2:
@ -187,7 +279,7 @@ Run helm tests on the chart after install/upgrade.
.. note:: .. note::
Armada will attempt to run helm tests by default. They may be disabled by Armada will attempt to run helm tests by default. They may be disabled by
setting the ``enabled`` key to ``False``. setting the ``enabled`` key to ``false``.
Test Options Test Options
^^^^^^^^^^^^ ^^^^^^^^^^^^