Improve the CI check for pod liveness to fail on pods that are repeatedly restarting

Change-Id: Ic7d638c090c108efc70a8a9d5f417fbf0ca84795
This commit is contained in:
jmarchel 2024-02-01 11:05:01 +01:00 committed by Radosław Piliszek
parent 5464c4c413
commit 49ebb72960
3 changed files with 31 additions and 0 deletions

15
playbooks/helm/run.yaml Normal file
View File

@ -0,0 +1,15 @@
- hosts: all
tasks:
- name: Install helm charts
include_role:
name: helm-template
vars:
helm_release_name: "{{ zj_item.key }}"
helm_chart: "{{ zj_item.value }}"
loop: "{{ helm_charts | dict2items }}"
loop_control:
loop_var: 'zj_item'
- name: Check pod restarts after helm chart installations
include_role:
name: check-pod-restarts

View File

@ -0,0 +1,15 @@
- name: Get pods and their restart counts
shell: >
kubectl get pods -o=jsonpath="{range .items[*]}{.metadata.name}:{.status.containerStatuses[*].restartCount}{'\n'}{end}"
register: pods_restart_counts
- name: Check for pods that have restarted more than the allowed threshold
set_fact:
unstable_pods: "{{ unstable_pods | default([]) + [item.split(':')[0]] }}"
loop: "{{ pods_restart_counts.stdout_lines }}"
when: item.split(':')[1] | int >= 3
- name: Report if any pod has restarted too many times
fail:
msg: "There were some unstable pods: {{ unstable_pods }}"
when: unstable_pods | default([]) | length > 0

View File

@ -22,6 +22,7 @@
roles:
- zuul: zuul/zuul-jobs
pre-run: playbooks/helm/pre.yaml
run: playbooks/helm/run.yaml
post-run: playbooks/kubernetes/post.yaml
vars:
helm_version: *helm_version