Add a health check for overcloud nodes
This adds a basic connection check to see if nodes are in a state where they can be connected to, prior to an upgrade or a scale-up. The validation uses data from Nova and Ironic to display more detailed info in the failing nodes. Change-Id: I3b098fd2aa169397e73757fb44941011c9206678
This commit is contained in:
parent
d66f8069b6
commit
a0c06ae727
@ -0,0 +1,6 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds a node health validation. The validation tries to ping all overcloud
|
||||
nodes. For those that can't be reached some information is collected from
|
||||
nova and ironic to display in the validation output.
|
35
validations/node-health.yaml
Normal file
35
validations/node-health.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
- hosts: undercloud
|
||||
vars:
|
||||
metadata:
|
||||
name: Node health check
|
||||
description: >
|
||||
Check if all overcloud nodes can be connected to before starting a
|
||||
scale-up or an upgrade.
|
||||
groups:
|
||||
- pre-upgrade
|
||||
tasks:
|
||||
- name: Ping all overcloud nodes
|
||||
icmp_ping:
|
||||
host: "{{ item }}"
|
||||
with_items: "{{ groups.overcloud }}"
|
||||
ignore_errors: true
|
||||
register: ping_results
|
||||
- name: Extract failed pings
|
||||
set_fact:
|
||||
failed_ips: "{{ ping_results.results | selectattr('failed', 'equalto', True) | map(attribute='item') | list }}"
|
||||
- name: Lookup nova servers for each failed IP
|
||||
set_fact:
|
||||
servers: "{{ lookup('nova_servers', 'ip', 'ctlplane', failed_ips, wantlist=True) }}"
|
||||
- name: Extract nova ids
|
||||
set_fact:
|
||||
server_ids: "{{ servers | map(attribute='id') | list }}"
|
||||
- name: Lookup ironic nodes for unreachable nova servers
|
||||
set_fact:
|
||||
nodes: "{{ lookup('ironic_nodes', 'instance_uuid', server_ids, wantlist=True) }}"
|
||||
- name: Fail if there are unreachable nodes
|
||||
fail:
|
||||
msg: |
|
||||
{{ lookup('template', './templates/unreachable_nodes.j2',
|
||||
template_vars=dict(nodes=nodes)) }}
|
||||
when: nodes|length > 0
|
9
validations/templates/unreachable_nodes.j2
Normal file
9
validations/templates/unreachable_nodes.j2
Normal file
@ -0,0 +1,9 @@
|
||||
The following nodes could not be reached ({{ nodes|length}} nodes):
|
||||
|
||||
{% for node in nodes %}
|
||||
* {{ node.name }}
|
||||
UUID: {{ node.uuid }}
|
||||
Instance: {{ node.instance_uuid }}
|
||||
Last Error: {{ node.last_error }}
|
||||
Power State: {{ node.power_state }}
|
||||
{% endfor %}
|
Loading…
Reference in New Issue
Block a user