Collect logs: Handle errors and timeout
currently if the collect logs script has error or is timing out, none of the copy tasks are run, and we may end up with no logs at all even on the log collection itself. This patch encloses the log collection within a block to handle errors, and runs the script with a specified timeout, to always leave some time for copying what logs we have from the node to the executor Change-Id: I428bd0aa8e35a2a94f2cf3039dd9e3ae683334a6
This commit is contained in:
parent
101074b2e8
commit
7f9e1017b0
@ -8,6 +8,10 @@
|
|||||||
- name: Collect logs
|
- name: Collect logs
|
||||||
hosts: primary
|
hosts: primary
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: set collection timeout
|
||||||
|
set_fact:
|
||||||
|
collect_timeout_sec: "{{ zuul.post_timeout|default(3600) - copy_logs_time|default(300) }}"
|
||||||
|
|
||||||
- name: Copy zuul_console_json log to workspace for reproducer
|
- name: Copy zuul_console_json log to workspace for reproducer
|
||||||
copy:
|
copy:
|
||||||
content: "{{ hostvars['localhost'].zuul_console_json }}"
|
content: "{{ hostvars['localhost'].zuul_console_json }}"
|
||||||
@ -36,12 +40,33 @@
|
|||||||
fi
|
fi
|
||||||
when: environment_type != "ovb" or not undercloud_logs.stat.exists
|
when: environment_type != "ovb" or not undercloud_logs.stat.exists
|
||||||
|
|
||||||
|
- name: Check script existence
|
||||||
|
stat:
|
||||||
|
path: "{{ ansible_user_dir }}/workspace/logs/collect_logs.sh"
|
||||||
|
register: collect_logs_path
|
||||||
|
|
||||||
- hosts:
|
- name: Collect logs with a timeout
|
||||||
- primary
|
block:
|
||||||
- centos-7
|
- name: Run ansible playbook to collect logs
|
||||||
tasks:
|
shell: |
|
||||||
|
timeout --preserve-status -s 15 \
|
||||||
|
-k {{ [collect_timeout_sec|int, 60]|sum|string }} {{ collect_timeout_sec|string }} \
|
||||||
|
bash {{ ansible_user_dir }}/workspace/logs/collect_logs.sh
|
||||||
|
when: collect_logs_path.stat.exists
|
||||||
|
register: collect_logs_run
|
||||||
|
|
||||||
|
rescue:
|
||||||
|
- name: warn when collect logs timed out (SIGTERM or SIGKILL used)
|
||||||
|
debug:
|
||||||
|
msg: "ERROR: Collect logs timed out"
|
||||||
|
when: collect_logs_path.stat.exists and (collect_logs_run.rc == 143 or collect_logs_run.rc == 137)
|
||||||
|
|
||||||
|
- name: warn when collect logs failed
|
||||||
|
debug:
|
||||||
|
msg: "ERROR: Collect logs failed, please check the logs"
|
||||||
|
when: collect_logs_path.stat.exists and collect_logs_run.rc != 143 and collect_logs_run.rc != 137
|
||||||
|
|
||||||
|
always:
|
||||||
- name: Ensure artifacts directory exists
|
- name: Ensure artifacts directory exists
|
||||||
file:
|
file:
|
||||||
path: '{{ zuul.executor.work_root }}/artifacts'
|
path: '{{ zuul.executor.work_root }}/artifacts'
|
||||||
|
Loading…
Reference in New Issue
Block a user