Tolerate 404 Response for non existing resource
Armada raises an exception for the 404 response from the Kubernetes API if the resource was not found. However, some resources, like jobs, are "volatile" they may disappear after some minutes. The armada may send a DELETE request to the Kubernetes API after the job finished its lifecycle. In this situation the job will not be found in the cluster and the armada will break some application reapply due to this behavior. Therefore, a possible solution may be check, at least for the job resource type, whether the response returned from the Kubernetes API is 404, if it is then just log the API response error reason and continue trying to delete the remaining jobs in the list, otherwise we raise the exception. Partial-Bug: 1948850 Signed-off-by: Iago Estrela <IagoFilipe.EstrelaBarros@windriver.com> Change-Id: I12ee8cef869e4443920d7f2e1899556e8940a977
This commit is contained in:
parent
b37417253f
commit
ddbdd7256c
@ -26,6 +26,7 @@ from hapi.services.tiller_pb2 import UninstallReleaseRequest
|
|||||||
from hapi.services.tiller_pb2 import UpdateReleaseRequest
|
from hapi.services.tiller_pb2 import UpdateReleaseRequest
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
from kubernetes.client.rest import ApiException
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from armada import const
|
from armada import const
|
||||||
@ -314,17 +315,17 @@ class Tiller(object):
|
|||||||
for action in actions.get('update', []):
|
for action in actions.get('update', []):
|
||||||
name = action.get('name')
|
name = action.get('name')
|
||||||
LOG.info('Updating %s ', name)
|
LOG.info('Updating %s ', name)
|
||||||
action_type = action.get('type')
|
resource_type = action.get('type')
|
||||||
labels = action.get('labels')
|
labels = action.get('labels')
|
||||||
|
|
||||||
self.rolling_upgrade_pod_deployment(
|
self.rolling_upgrade_pod_deployment(
|
||||||
name, release_name, namespace, labels, action_type, chart,
|
name, release_name, namespace, labels, resource_type,
|
||||||
disable_hooks, values, timeout)
|
chart, disable_hooks, values, timeout)
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(
|
LOG.exception(
|
||||||
"Pre-action failure: could not perform rolling upgrade for "
|
"Pre-action failure: could not perform rolling upgrade for "
|
||||||
"%(res_type)s %(res_name)s.", {
|
"%(res_type)s %(res_name)s.", {
|
||||||
'res_type': action_type,
|
'res_type': resource_type,
|
||||||
'res_name': name
|
'res_name': name
|
||||||
})
|
})
|
||||||
raise ex.PreUpdateJobDeleteException(name, namespace)
|
raise ex.PreUpdateJobDeleteException(name, namespace)
|
||||||
@ -332,16 +333,16 @@ class Tiller(object):
|
|||||||
try:
|
try:
|
||||||
for action in actions.get('delete', []):
|
for action in actions.get('delete', []):
|
||||||
name = action.get('name')
|
name = action.get('name')
|
||||||
action_type = action.get('type')
|
resource_type = action.get('type')
|
||||||
labels = action.get('labels', None)
|
labels = action.get('labels', None)
|
||||||
|
|
||||||
self.delete_resources(
|
self.delete_resources(
|
||||||
action_type, labels, namespace, timeout=timeout)
|
resource_type, labels, namespace, timeout=timeout)
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(
|
LOG.exception(
|
||||||
"Pre-action failure: could not delete %(res_type)s "
|
"Pre-action failure: could not delete %(res_type)s "
|
||||||
"%(res_name)s.", {
|
"%(res_name)s.", {
|
||||||
'res_type': action_type,
|
'res_type': resource_type,
|
||||||
'res_name': name
|
'res_name': name
|
||||||
})
|
})
|
||||||
raise ex.PreUpdateJobDeleteException(name, namespace)
|
raise ex.PreUpdateJobDeleteException(name, namespace)
|
||||||
@ -617,13 +618,80 @@ class Tiller(object):
|
|||||||
status = self.get_release_status(release)
|
status = self.get_release_status(release)
|
||||||
raise ex.ReleaseException(release, status, 'Delete')
|
raise ex.ReleaseException(release, status, 'Delete')
|
||||||
|
|
||||||
|
def _delete_jobs(self, jobs, resource_labels, namespace, timeout):
|
||||||
|
for jb in jobs.items:
|
||||||
|
try:
|
||||||
|
jb_name = jb.metadata.name
|
||||||
|
LOG.info(
|
||||||
|
"Deleting job: %s in namespace: %s", jb_name, namespace)
|
||||||
|
self.k8s.delete_job_action(jb_name, namespace, timeout=timeout)
|
||||||
|
except ApiException as err:
|
||||||
|
if err.status != 404:
|
||||||
|
raise ApiException
|
||||||
|
LOG.warn(
|
||||||
|
"No jobs found with labels=%s namespace=%s",
|
||||||
|
resource_labels, namespace)
|
||||||
|
|
||||||
|
def _delete_cronjobs(
|
||||||
|
self,
|
||||||
|
cronjobs,
|
||||||
|
resource_labels,
|
||||||
|
namespace,
|
||||||
|
timeout,
|
||||||
|
implied_cronjob=False):
|
||||||
|
for jb in cronjobs.items:
|
||||||
|
try:
|
||||||
|
jb_name = jb.metadata.name
|
||||||
|
# TODO: Remove when v1 doc support is removed.
|
||||||
|
if implied_cronjob:
|
||||||
|
LOG.warn(
|
||||||
|
"Deleting cronjobs via `type: job` is "
|
||||||
|
"deprecated, use `type: cronjob` instead")
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
"Deleting cronjob %s in namespace: %s", jb_name, namespace)
|
||||||
|
self.k8s.delete_cron_job_action(
|
||||||
|
jb_name, namespace, timeout=timeout)
|
||||||
|
except ApiException as err:
|
||||||
|
if err.status != 404:
|
||||||
|
raise ApiException
|
||||||
|
LOG.warn(
|
||||||
|
"No cronjobs found with labels=%s namespace=%s",
|
||||||
|
resource_labels, namespace)
|
||||||
|
|
||||||
|
def _delete_pods(
|
||||||
|
self, release_pods, resource_labels, namespace, timeout,
|
||||||
|
wait=False):
|
||||||
|
for pod in release_pods.items:
|
||||||
|
try:
|
||||||
|
pod_name = pod.metadata.name
|
||||||
|
LOG.info(
|
||||||
|
"Deleting pod %s in namespace: %s", pod_name, namespace)
|
||||||
|
self.k8s.delete_pod_action(
|
||||||
|
pod_name, namespace, timeout=timeout)
|
||||||
|
if wait:
|
||||||
|
self.k8s.wait_for_pod_redeployment(pod_name, namespace)
|
||||||
|
except ApiException as err:
|
||||||
|
if err.status != 404:
|
||||||
|
raise ApiException
|
||||||
|
LOG.warn(
|
||||||
|
"No pods found with labels=%s namespace=%s",
|
||||||
|
resource_labels, namespace)
|
||||||
|
|
||||||
|
def _job_implies_cronjob(self, resource_type):
|
||||||
|
chart = get_current_chart()
|
||||||
|
schema_info = schema.get_schema_info(chart['schema'])
|
||||||
|
job_implies_cronjob = schema_info.version < 2
|
||||||
|
return resource_type == 'job' and job_implies_cronjob
|
||||||
|
|
||||||
def delete_resources(
|
def delete_resources(
|
||||||
self,
|
self,
|
||||||
resource_type,
|
resource_type,
|
||||||
resource_labels,
|
resource_labels,
|
||||||
namespace,
|
namespace,
|
||||||
wait=False,
|
wait=False,
|
||||||
timeout=const.DEFAULT_TILLER_TIMEOUT):
|
timeout=const.DEFAULT_TILLER_TIMEOUT,
|
||||||
|
implied_job_check=True):
|
||||||
'''
|
'''
|
||||||
Delete resources matching provided resource type, labels, and
|
Delete resources matching provided resource type, labels, and
|
||||||
namespace.
|
namespace.
|
||||||
@ -643,50 +711,32 @@ class Tiller(object):
|
|||||||
|
|
||||||
handled = False
|
handled = False
|
||||||
if resource_type == 'job':
|
if resource_type == 'job':
|
||||||
get_jobs = self.k8s.get_namespace_job(
|
jobs = self.k8s.get_namespace_job(
|
||||||
namespace, label_selector=label_selector)
|
namespace, label_selector=label_selector)
|
||||||
for jb in get_jobs.items:
|
self._delete_jobs(jobs, resource_labels, namespace, timeout)
|
||||||
jb_name = jb.metadata.name
|
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
"Deleting job: %s in namespace: %s", jb_name, namespace)
|
|
||||||
self.k8s.delete_job_action(jb_name, namespace, timeout=timeout)
|
|
||||||
handled = True
|
handled = True
|
||||||
|
|
||||||
# TODO: Remove when v1 doc support is removed.
|
# TODO: Remove when v1 doc support is removed.
|
||||||
chart = get_current_chart()
|
implied_cronjob = False
|
||||||
schema_info = schema.get_schema_info(chart['schema'])
|
if implied_job_check:
|
||||||
job_implies_cronjob = schema_info.version < 2
|
implied_cronjob = self._job_implies_cronjob(resource_type)
|
||||||
implied_cronjob = resource_type == 'job' and job_implies_cronjob
|
|
||||||
|
|
||||||
if resource_type == 'cronjob' or implied_cronjob:
|
if resource_type == 'cronjob' or implied_cronjob:
|
||||||
get_jobs = self.k8s.get_namespace_cron_job(
|
cronjobs = self.k8s.get_namespace_cron_job(
|
||||||
namespace, label_selector=label_selector)
|
namespace, label_selector=label_selector)
|
||||||
for jb in get_jobs.items:
|
self._delete_cronjobs(
|
||||||
jb_name = jb.metadata.name
|
cronjobs,
|
||||||
|
resource_labels,
|
||||||
# TODO: Remove when v1 doc support is removed.
|
namespace,
|
||||||
if implied_cronjob:
|
timeout,
|
||||||
LOG.warn(
|
implied_cronjob=implied_cronjob)
|
||||||
"Deleting cronjobs via `type: job` is "
|
|
||||||
"deprecated, use `type: cronjob` instead")
|
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
"Deleting cronjob %s in namespace: %s", jb_name, namespace)
|
|
||||||
self.k8s.delete_cron_job_action(jb_name, namespace)
|
|
||||||
handled = True
|
handled = True
|
||||||
|
|
||||||
if resource_type == 'pod':
|
if resource_type == 'pod':
|
||||||
release_pods = self.k8s.get_namespace_pod(
|
release_pods = self.k8s.get_namespace_pod(
|
||||||
namespace, label_selector=label_selector)
|
namespace, label_selector=label_selector)
|
||||||
for pod in release_pods.items:
|
self._delete_pods(
|
||||||
pod_name = pod.metadata.name
|
release_pods, resource_labels, namespace, timeout, wait=wait)
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
"Deleting pod %s in namespace: %s", pod_name, namespace)
|
|
||||||
self.k8s.delete_pod_action(pod_name, namespace)
|
|
||||||
if wait:
|
|
||||||
self.k8s.wait_for_pod_redeployment(pod_name, namespace)
|
|
||||||
handled = True
|
handled = True
|
||||||
|
|
||||||
if not handled:
|
if not handled:
|
||||||
@ -700,7 +750,7 @@ class Tiller(object):
|
|||||||
release_name,
|
release_name,
|
||||||
namespace,
|
namespace,
|
||||||
resource_labels,
|
resource_labels,
|
||||||
action_type,
|
resource_type,
|
||||||
chart,
|
chart,
|
||||||
disable_hooks,
|
disable_hooks,
|
||||||
values,
|
values,
|
||||||
@ -709,9 +759,9 @@ class Tiller(object):
|
|||||||
update statefulsets (daemon, stateful)
|
update statefulsets (daemon, stateful)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if action_type == 'daemonset':
|
if resource_type == 'daemonset':
|
||||||
|
|
||||||
LOG.info('Updating: %s', action_type)
|
LOG.info('Updating: %s', resource_type)
|
||||||
|
|
||||||
label_selector = ''
|
label_selector = ''
|
||||||
|
|
||||||
@ -726,7 +776,7 @@ class Tiller(object):
|
|||||||
ds_labels = ds.metadata.labels
|
ds_labels = ds.metadata.labels
|
||||||
if ds_name == name:
|
if ds_name == name:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Deleting %s : %s in %s", action_type, ds_name,
|
"Deleting %s : %s in %s", resource_type, ds_name,
|
||||||
namespace)
|
namespace)
|
||||||
self.k8s.delete_daemon_action(ds_name, namespace)
|
self.k8s.delete_daemon_action(ds_name, namespace)
|
||||||
|
|
||||||
@ -750,7 +800,8 @@ class Tiller(object):
|
|||||||
timeout=timeout)
|
timeout=timeout)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
LOG.error("Unable to exectue name: % type: %s", name, action_type)
|
LOG.error(
|
||||||
|
"Unable to execute name: % type: %s", name, resource_type)
|
||||||
|
|
||||||
def rollback_release(
|
def rollback_release(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user