Armada improved logging, uplift dependency

This PS:

1) Looks to improve specific logging in Armada, so that
it's easier to debug deployment related issues
2) Uplifts the k8s Python dependency to 12.0.0
3) Enforces 'watch' timeouts more strictly, as the call to
the Kubernetes Python watch function seemed unreliable.
4) Adds a field selector to the 'watch' stream to look for
the DELETE action to have been completed on the specific
pod/job/cronjob, rather than looking across the whole
namespace or via labels. This will narrow what the watch
is looking at, making the logs less busy.

Change-Id: I1952b0db32fb0b56ffffcddeae0532beb5a27b67
This commit is contained in:
DeJaeger, Darren (dd118r) 2021-05-13 14:58:32 -04:00
parent 973338590f
commit 9aadc14777
3 changed files with 34 additions and 22 deletions

View File

@ -13,6 +13,7 @@
# limitations under the License.
import re
import time
from kubernetes import client
from kubernetes import config
@ -151,34 +152,45 @@ class K8s(object):
timeout = self._check_timeout(timeout)
LOG.debug(
'Watching to delete %s %s, Wait timeout=%s',
object_type_description, name, timeout)
'Watching to delete %s: %s in namespace=%s (wait timeout=%s)',
object_type_description, name, namespace, timeout)
body = client.V1DeleteOptions(
propagation_policy=propagation_policy)
w = watch.Watch()
issue_delete = True
found_events = False
for event in w.stream(list_func, namespace=namespace,
timeout_seconds=timeout):
if issue_delete:
delete_func(name=name, namespace=namespace, body=body)
issue_delete = False
event_type = event['type'].upper()
item_name = event['object'].metadata.name
LOG.debug('Watch event %s on %s', event_type, item_name)
deadline = round(time.time() + timeout)
while timeout > 0:
for event in w.stream(
list_func, namespace=namespace,
field_selector='metadata.name={}'.format(name),
timeout_seconds=timeout):
if issue_delete:
delete_func(name=name, namespace=namespace, body=body)
issue_delete = False
if item_name == name:
found_events = True
if event_type == 'DELETED':
LOG.info(
'Successfully deleted %s %s',
object_type_description, item_name)
return
event_type = event['type'].upper()
item = event['object']
item_name = item.metadata.name
LOG.debug(
'Watch event seen: type=%s, name=%s, '
'namespace=%s (waiting on %s: %s)', event_type,
item_name, namespace, object_type_description, name)
if item_name == name:
found_events = True
if event_type == 'DELETED':
LOG.info(
'Successfully deleted %s: %s in namespace=%s',
object_type_description, item_name, namespace)
return
timeout = round(deadline - time.time())
if not found_events:
LOG.warn(
'Saw no delete events for %s %s in namespace=%s',
'Saw no events for %s: %s in namespace=%s',
object_type_description, name, namespace)
err_msg = (

View File

@ -638,8 +638,8 @@ class Tiller(object):
if resource_labels is not None:
label_selector = label_selectors(resource_labels)
LOG.debug(
"Deleting resources in namespace %s matching "
"selectors (%s).", namespace, label_selector)
"Deleting resources in namespace: %s, matching "
"selectors: %s (timeout=%s).", namespace, label_selector, timeout)
handled = False
if resource_type == 'job':
@ -649,7 +649,7 @@ class Tiller(object):
jb_name = jb.metadata.name
LOG.info(
"Deleting job %s in namespace: %s", jb_name, namespace)
"Deleting job: %s in namespace: %s", jb_name, namespace)
self.k8s.delete_job_action(jb_name, namespace, timeout=timeout)
handled = True

View File

@ -6,7 +6,7 @@ jsonschema>=3.0.1<4
keystoneauth1>=3.18.0
keystonemiddleware==5.3.0
kombu<4.7,>=4.6.10
kubernetes>=11.0.0
kubernetes>=12.0.0
Paste>=2.0.3
PasteDeploy>=1.5.2
protobuf>=3.4.0