kayobe/ansible/overcloud-etc-hosts-fixup.yml
Mark Goddard 6c54ce4d3b Introduce max fail percentage to playbooks
This allows us to continue execution until a certain proportion of hosts
fail. This can be useful at scale, where failures are common, and
restarting a deployment is time-consuming.

The default max failure percentage is 100, keeping the default
behaviour. A global max failure percentage may be set via
kayobe_max_fail_percentage, and individual playbooks may define a max
failure percentage via <playbook>_max_fail_percentage.

Related Kolla Ansible patch:
https://review.opendev.org/c/openstack/kolla-ansible/+/805598

Change-Id: Ib81c72b63be5765cca664c38141ffc769640cf07
2024-06-03 16:24:29 +00:00

74 lines
2.8 KiB
YAML

---
# For some currently unknown reason, overcloud hosts end up with multiple
# entries in /etc/hosts that map their own hostname to their admin
# network IP address, in addition to one that maps their own hostname to their
# internal network IP address. This causes RabbitMQ upgrades to fail, as
# RabbitMQ expects the system's hostname to resolve to the IP address on
# which it is listening. As a workaround, we remove the stale entries from
# /etc/hosts. See https://github.com/stackhpc/kayobe/issues/14.
- name: Ensure overcloud hosts' /etc/hosts does not contain incorrect IPs
hosts: overcloud
max_fail_percentage: >-
{{ overcloud_etc_hosts_fixup_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- etc-hosts-fixup
tasks:
# Remove any entries from /etc/hosts that map the current hostname to an IP
# other than the host's IP on the internal API network.
- name: Ensure overcloud hosts' /etc/hosts does not contain incorrect IPs
lineinfile:
dest: /etc/hosts
regexp: "^(?!{{ internal_net_name | net_ip | regex_escape }})[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+[ \t]*{{ ansible_facts.hostname }}"
state: absent
# Ensure that the correct entry is present.
validate: "grep -E '^({{ internal_net_name | net_ip | regex_escape }}).*{{ ansible_facts.hostname }}' %s"
become: True
- name: Ensure rabbitmq containers' /etc/hosts does not contain incorrect IPs
hosts: overcloud
max_fail_percentage: >-
{{ overcloud_etc_hosts_fixup_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- etc-hosts-fixup
vars:
rabbitmq_containers:
- rabbitmq
tasks:
- name: Check whether rabbitmq container is running
command: docker inspect -f {{ '{{.Id}}' }} {{ item }}
changed_when: False
failed_when: False
with_items: "{{ rabbitmq_containers }}"
register: ps_result
- name: Copy /etc/hosts into rabbitmq containers
command: docker cp /etc/hosts {{ item.item }}:/tmp/hosts
with_items: "{{ ps_result.results }}"
when: item.rc == 0
changed_when: false
- name: Ensure rabbitmq containers' /etc/hosts does not contain incorrect IPs
command: >
docker exec -u root {{ item.item }}
bash -c
'if ! diff -q /tmp/hosts /etc/hosts >/dev/null; then
cp /tmp/hosts /etc/hosts &&
echo changed
fi &&
rm /tmp/hosts'
changed_when: "'changed' in sed_result.stdout"
with_items: "{{ ps_result.results }}"
when: item.rc == 0
register: sed_result
- name: Check that RabbitMQ client works
command: docker exec {{ item.item }} rabbitmqctl status
with_items: "{{ ps_result.results }}"
when: item.rc == 0
changed_when: false