tripleo-validations/roles/ceph/tasks/ceph-health.yaml

91 lines
3.3 KiB
YAML

---
- name: Check if ceph_mon is deployed
become: true
shell: hiera -c /etc/puppet/hiera.yaml enabled_services | egrep -sq ceph_mon
ignore_errors: true
register: ceph_mon_enabled
changed_when: false
delegate_to: "{{ tripleo_delegate_to | first | default(omit) }}"
- when: "ceph_mon_enabled is succeeded"
delegate_to: "{{ tripleo_delegate_to | first | default(omit) }}"
block:
- name: Check for docker cli
stat:
path: "/var/run/docker.sock"
register: check_docker_cli
check_mode: false
- name: Set container_client fact
set_fact:
container_client: |-
{% set container_client = 'podman' %}
{% if check_docker_cli.stat.exists|bool %}
{% set container_client = 'docker' %}
{% endif %}
{{ container_client }}
- name: Set container filter format
set_fact:
container_filter_format: !unsafe "--format '{{ .Names }}'"
- name: Set ceph_mon_container name
become: true
shell: "{{ container_client }} ps {{ container_filter_format }} | grep ceph-mon"
register: ceph_mon_container
changed_when: false
- name: Set ceph cluster name
become: true
shell: find /etc/ceph -name '*.conf' -prune -print -quit | xargs basename -s '.conf'
register: ceph_cluster_name
changed_when: false
- name: Get ceph health
become: true
shell: "{{ container_client }} exec {{ ceph_mon_container.stdout }} ceph --cluster {{ ceph_cluster_name.stdout }} health | awk '{print $1}'"
register: ceph_health
- name: Check ceph health
warn:
msg: Ceph is in {{ ceph_health.stdout }} state.
when:
- ceph_health.stdout != 'HEALTH_OK'
- not fail_on_ceph_health_err|default(true)|bool
- name: Fail if ceph health is HEALTH_WARN
fail:
msg: Ceph is in {{ ceph_health.stdout }} state.
when:
- ceph_health.stdout == 'HEALTH_WARN'
- fail_on_ceph_health_warn|default(false)|bool
- name: Fail if ceph health is HEALTH_ERR
fail:
msg: Ceph is in {{ ceph_health.stdout }} state.
when:
- ceph_health.stdout == 'HEALTH_ERR'
- fail_on_ceph_health_err|default(true)|bool
- when:
- osd_percentage_min|default(0) > 0
block:
# 1.3+ <= jq <= 1.5 doesn't support try-catch: we rely on if - then -else approach to check if ceph has .osdmap layout
- name: set jq osd percentage filter
set_fact:
jq_osd_percentage_filter: '(if .osdmap != null then { osds: (.osdmap.num_in_osds / .osdmap.num_osds * 100) }
else { osds: (.num_in_osds / .num_osds * 100) } end) | .osds'
- name: Get OSD stat percentage
become: true
shell: >-
"{{ container_client }}" exec "{{ ceph_mon_container.stdout }}" ceph
--cluster "{{ ceph_cluster_name.stdout }}" osd stat -f json | jq '{{ jq_osd_percentage_filter }}'
register: ceph_osd_in_percentage
- name: Fail if there is an unacceptable percentage of in OSDs
fail:
msg: "Only {{ ceph_osd_in_percentage.stdout|float }}% of OSDs are in, but {{ osd_percentage_min|default(0) }}% are required"
when:
- ceph_osd_in_percentage.stdout|float < osd_percentage_min|default(0)