Merge "Refactoring node-health validation"
This commit is contained in:
commit
54aa994870
@ -4,3 +4,4 @@ collections:
|
||||
- community.general
|
||||
- community.crypto
|
||||
- ansible.posix
|
||||
- openstack.cloud
|
||||
|
@ -2,5 +2,12 @@
|
||||
node_health
|
||||
===========
|
||||
|
||||
Role is used by the :ref:`pre-upgrade_node-health` validation to verify state of the overcloud
|
||||
compute services and baremetal nodes they are running on.
|
||||
|
||||
As the clients contacted require Keystone authentication, the role requires
|
||||
relevant values, such as Keystone endpoint and username, for correct operation.
|
||||
Otherwise it will produce authentication error.
|
||||
|
||||
.. ansibleautoplugin::
|
||||
:role: roles/node_health
|
||||
|
@ -1,11 +1,12 @@
|
||||
---
|
||||
- hosts: undercloud
|
||||
- hosts: localhost
|
||||
vars:
|
||||
metadata:
|
||||
name: Node health check
|
||||
description: |
|
||||
Check if all overcloud nodes can be connected to before starting a
|
||||
scale-up or an upgrade.
|
||||
scale-up or an upgrade. Validation requires cloud authentication details
|
||||
in the form of accessible clouds.yaml file to be correctly executed.
|
||||
groups:
|
||||
- pre-upgrade
|
||||
categories:
|
||||
|
@ -1,35 +1,54 @@
|
||||
---
|
||||
- name: Collect IPs for allovercloud nodes
|
||||
set_fact: ansible_host="{{ hostvars[item]['ansible_host'] }}"
|
||||
register: oc_ips
|
||||
with_items: "{{ groups.allovercloud }}"
|
||||
|
||||
- name: Ping all overcloud nodes
|
||||
icmp_ping:
|
||||
host: "{{ item }}"
|
||||
with_items: "{{ oc_ips.results | map(attribute='ansible_facts.ansible_host') | list }}"
|
||||
- name: Retrieving compute services
|
||||
ignore_errors: true
|
||||
register: ping_results
|
||||
openstack.cloud.compute_service_info:
|
||||
cloud: overcloud
|
||||
register: result
|
||||
|
||||
- name: Extract failed pings
|
||||
set_fact:
|
||||
failed_ips: "{{ ping_results.results | selectattr('failed', 'equalto', True) | map(attribute='item') | list }}"
|
||||
|
||||
- name: Lookup nova servers for each failed IP
|
||||
set_fact:
|
||||
servers: "{{ lookup('nova_servers', 'ip', 'ctlplane', failed_ips, wantlist=True) }}"
|
||||
|
||||
- name: Extract nova ids
|
||||
set_fact:
|
||||
server_ids: "{{ servers | map(attribute='id') | list }}"
|
||||
|
||||
- name: Lookup ironic nodes for unreachable nova servers
|
||||
set_fact:
|
||||
nodes: "{{ lookup('ironic_nodes', 'instance_uuid', server_ids, wantlist=True) }}"
|
||||
|
||||
- name: Fail if there are unreachable nodes
|
||||
- name: Fail if the compute services can't be queried
|
||||
fail:
|
||||
msg: |
|
||||
{{ lookup('template', './templates/unreachable_nodes.j2',
|
||||
template_vars=dict(nodes=nodes)) }}
|
||||
when: nodes|length > 0
|
||||
msg: Compute services query failed with {{ result.msg }}
|
||||
when: result.failed
|
||||
|
||||
- name: Get nova nodes
|
||||
set_fact:
|
||||
nova_nodes: "{{ result.openstack_compute_services | community.general.json_query(query) }}"
|
||||
vars:
|
||||
query: "[?contains(name, 'nova')]"
|
||||
|
||||
- name: Get failed nova nodes
|
||||
set_fact:
|
||||
failed_nodes: "{{ nova_nodes | community.general.json_query(failed_nodes_query) }}"
|
||||
vars:
|
||||
failed_nodes_query: "[?state!='up']"
|
||||
|
||||
- when: failed_nodes | length > 0
|
||||
block:
|
||||
- name: Get baremetal nodes info
|
||||
become: true
|
||||
openstack.cloud.baremetal_node_info:
|
||||
cloud: undercloud
|
||||
register: result
|
||||
|
||||
- name: Get baremetal nodes
|
||||
set_fact:
|
||||
baremetal_nodes: "{{ result.baremetal_nodes }}"
|
||||
|
||||
- name: Get failed node names
|
||||
set_fact:
|
||||
node_names: "{{ item.host.split('.')[0]}}"
|
||||
with_items: "{{ failed_nodes }}"
|
||||
|
||||
- name: Get failed baremetal nodes
|
||||
set_fact:
|
||||
failed_baremetal_nodes: "{{ baremetal_nodes | to_json | from_json | community.general.json_query(query) }}"
|
||||
with_items: "{{ node_names }}"
|
||||
vars:
|
||||
query: "[?contains(name, '{{ item }}')]"
|
||||
|
||||
- name: Fail if there are unreachable nodes
|
||||
fail:
|
||||
msg: |
|
||||
{{ lookup('template', './templates/unreachable_nodes.j2',
|
||||
template_vars=dict(nodes=failed_baremetal_nodes)) }}
|
||||
when: failed_baremetal_nodes|length > 0
|
||||
|
Loading…
Reference in New Issue
Block a user