tripleo_container_manage: improve logging for failed containers

If a container fails to start after many retries, the default logging of
the async_status tasks isn't great and it's hard to figure out what
container failed to start.

In this patch, we introduce a new filter that will read the
async_results and build a list of containers which failed to start
(failed to True) or did not finish to start (finished to 0); the
async_status ignores errors, but we fail a bit later after building that
list.

Change-Id: I5a2270130bdf5b9d781f4d81ec25c6ccf12fdc07
This commit is contained in:
Emilien Macchi 2020-03-20 17:54:35 -04:00
parent 1a5b27c457
commit 62fcbf2925
3 changed files with 74 additions and 1 deletions

View File

@ -42,7 +42,8 @@ class FilterModule(object):
'recursive_get_key_from_dict': self.recursive_get_key_from_dict,
'get_role_assignments': self.get_role_assignments,
'get_domain_id': self.get_domain_id,
'get_changed_containers': self.get_changed_containers
'get_changed_containers': self.get_changed_containers,
'get_failed_containers': self.get_failed_containers
}
def subsort(self, dict_to_sort, attribute, null_value=0):
@ -387,3 +388,17 @@ class FilterModule(object):
if item['container'].get('Name'):
changed.append(item['container'].get('Name'))
return changed
def get_failed_containers(self, async_results):
"""Return a list of containers that failed to start on time.
This filter takes in input async results of a podman_container
invocation and returns the list of containers that did not
finished correctly.
"""
failed = []
for item in async_results:
if item['failed'] or not item['finished']:
for k, v in item['container_data'].items():
failed.append(k)
return failed

View File

@ -86,6 +86,8 @@
register: create_async_poll_results
until: create_async_poll_results.finished
retries: 60
# We fail later if a container has failed to start
failed_when: false
when:
- not ansible_check_mode|bool
@ -94,6 +96,13 @@
- name: "Create a list of containers which changed"
set_fact:
containers_changed: "{{ create_async_poll_results.results | get_changed_containers }}"
containers_failed: "{{ create_async_poll_results.results | get_failed_containers }}"
- name: Print the containers that failed to start
fail:
msg: "{{ containers_failed }} failed to start, check logs in /var/log/containers/stdouts/"
when:
- containers_failed|length != 0
- name: Block for container commands
when:

View File

@ -820,3 +820,52 @@ class TestHelperFilters(tests_base.TestCase):
expected_list = ['mysql']
result = self.filters.get_changed_containers(data)
self.assertEqual(result, expected_list)
def test_get_failed_containers(self):
data = [
{
"ansible_job_id": "948704694230.17597",
"ansible_loop_var": "container_data",
"changed": True,
"container_data": {
"haproxy": {
"image": "haproxy:latest",
}
},
"failed": False,
"finished": 1,
"results_file": "/root/.ansible_async/948704694230.17597",
"started": 1
},
{
"ansible_job_id": "948704694230.17597",
"ansible_loop_var": "container_data",
"changed": True,
"container_data": {
"memcached": {
"image": "memcached:latest",
}
},
"failed": True,
"finished": 1,
"results_file": "/root/.ansible_async/948704694230.17597",
"started": 1
},
{
"ansible_job_id": "316140143697.17616",
"ansible_loop_var": "container_data",
"changed": True,
"container_data": {
"mysql": {
"image": "mysql:latest",
}
},
"failed": False,
"finished": 0,
"results_file": "/root/.ansible_async/316140143697.17616",
"started": 1
}
]
expected_list = ['memcached', 'mysql']
result = self.filters.get_failed_containers(data)
self.assertEqual(result, expected_list)