tripleo_container_manage: improve logging for failed containers
If a container fails to start after many retries, the default logging of the async_status tasks isn't great and it's hard to figure out what container failed to start. In this patch, we introduce a new filter that will read the async_results and build a list of containers which failed to start (failed to True) or did not finish to start (finished to 0); the async_status ignores errors, but we fail a bit later after building that list. Change-Id: I5a2270130bdf5b9d781f4d81ec25c6ccf12fdc07
This commit is contained in:
parent
1a5b27c457
commit
62fcbf2925
|
@ -42,7 +42,8 @@ class FilterModule(object):
|
|||
'recursive_get_key_from_dict': self.recursive_get_key_from_dict,
|
||||
'get_role_assignments': self.get_role_assignments,
|
||||
'get_domain_id': self.get_domain_id,
|
||||
'get_changed_containers': self.get_changed_containers
|
||||
'get_changed_containers': self.get_changed_containers,
|
||||
'get_failed_containers': self.get_failed_containers
|
||||
}
|
||||
|
||||
def subsort(self, dict_to_sort, attribute, null_value=0):
|
||||
|
@ -387,3 +388,17 @@ class FilterModule(object):
|
|||
if item['container'].get('Name'):
|
||||
changed.append(item['container'].get('Name'))
|
||||
return changed
|
||||
|
||||
def get_failed_containers(self, async_results):
|
||||
"""Return a list of containers that failed to start on time.
|
||||
|
||||
This filter takes in input async results of a podman_container
|
||||
invocation and returns the list of containers that did not
|
||||
finished correctly.
|
||||
"""
|
||||
failed = []
|
||||
for item in async_results:
|
||||
if item['failed'] or not item['finished']:
|
||||
for k, v in item['container_data'].items():
|
||||
failed.append(k)
|
||||
return failed
|
||||
|
|
|
@ -86,6 +86,8 @@
|
|||
register: create_async_poll_results
|
||||
until: create_async_poll_results.finished
|
||||
retries: 60
|
||||
# We fail later if a container has failed to start
|
||||
failed_when: false
|
||||
when:
|
||||
- not ansible_check_mode|bool
|
||||
|
||||
|
@ -94,6 +96,13 @@
|
|||
- name: "Create a list of containers which changed"
|
||||
set_fact:
|
||||
containers_changed: "{{ create_async_poll_results.results | get_changed_containers }}"
|
||||
containers_failed: "{{ create_async_poll_results.results | get_failed_containers }}"
|
||||
|
||||
- name: Print the containers that failed to start
|
||||
fail:
|
||||
msg: "{{ containers_failed }} failed to start, check logs in /var/log/containers/stdouts/"
|
||||
when:
|
||||
- containers_failed|length != 0
|
||||
|
||||
- name: Block for container commands
|
||||
when:
|
||||
|
|
|
@ -820,3 +820,52 @@ class TestHelperFilters(tests_base.TestCase):
|
|||
expected_list = ['mysql']
|
||||
result = self.filters.get_changed_containers(data)
|
||||
self.assertEqual(result, expected_list)
|
||||
|
||||
def test_get_failed_containers(self):
|
||||
data = [
|
||||
{
|
||||
"ansible_job_id": "948704694230.17597",
|
||||
"ansible_loop_var": "container_data",
|
||||
"changed": True,
|
||||
"container_data": {
|
||||
"haproxy": {
|
||||
"image": "haproxy:latest",
|
||||
}
|
||||
},
|
||||
"failed": False,
|
||||
"finished": 1,
|
||||
"results_file": "/root/.ansible_async/948704694230.17597",
|
||||
"started": 1
|
||||
},
|
||||
{
|
||||
"ansible_job_id": "948704694230.17597",
|
||||
"ansible_loop_var": "container_data",
|
||||
"changed": True,
|
||||
"container_data": {
|
||||
"memcached": {
|
||||
"image": "memcached:latest",
|
||||
}
|
||||
},
|
||||
"failed": True,
|
||||
"finished": 1,
|
||||
"results_file": "/root/.ansible_async/948704694230.17597",
|
||||
"started": 1
|
||||
},
|
||||
{
|
||||
"ansible_job_id": "316140143697.17616",
|
||||
"ansible_loop_var": "container_data",
|
||||
"changed": True,
|
||||
"container_data": {
|
||||
"mysql": {
|
||||
"image": "mysql:latest",
|
||||
}
|
||||
},
|
||||
"failed": False,
|
||||
"finished": 0,
|
||||
"results_file": "/root/.ansible_async/316140143697.17616",
|
||||
"started": 1
|
||||
}
|
||||
]
|
||||
expected_list = ['memcached', 'mysql']
|
||||
result = self.filters.get_failed_containers(data)
|
||||
self.assertEqual(result, expected_list)
|
||||
|
|
Loading…
Reference in New Issue