zuul-jobs/roles/check-pod-restarts/tasks/main.yaml
jmarchel 49ebb72960 Improve the CI check for pod liveness to fail on pods that are repeatedly restarting
Change-Id: Ic7d638c090c108efc70a8a9d5f417fbf0ca84795
2024-02-09 21:35:48 +00:00

16 lines
637 B
YAML

- name: Get pods and their restart counts
shell: >
kubectl get pods -o=jsonpath="{range .items[*]}{.metadata.name}:{.status.containerStatuses[*].restartCount}{'\n'}{end}"
register: pods_restart_counts
- name: Check for pods that have restarted more than the allowed threshold
set_fact:
unstable_pods: "{{ unstable_pods | default([]) + [item.split(':')[0]] }}"
loop: "{{ pods_restart_counts.stdout_lines }}"
when: item.split(':')[1] | int >= 3
- name: Report if any pod has restarted too many times
fail:
msg: "There were some unstable pods: {{ unstable_pods }}"
when: unstable_pods | default([]) | length > 0