kayobe/ansible/overcloud-bios-raid.yml
Mark Goddard 6c54ce4d3b Introduce max fail percentage to playbooks
This allows us to continue execution until a certain proportion of hosts
fail. This can be useful at scale, where failures are common, and
restarting a deployment is time-consuming.

The default max failure percentage is 100, keeping the default
behaviour. A global max failure percentage may be set via
kayobe_max_fail_percentage, and individual playbooks may define a max
failure percentage via <playbook>_max_fail_percentage.

Related Kolla Ansible patch:
https://review.opendev.org/c/openstack/kolla-ansible/+/805598

Change-Id: Ib81c72b63be5765cca664c38141ffc769640cf07
2024-06-03 16:24:29 +00:00

133 lines
4.7 KiB
YAML

---
# Perform configuration of the overcloud nodes' BIOS and RAID. Nodes should be
# registered with the seed's ironic service but not active. The BIOS and RAID
# is configured out of band using the ansible stackhpc.drac role. As such, only
# Dell servers with iDRACs are currently supported. During configuration, we
# set the ironic nodes' to maintenance mode to prevent ironic from managing
# their power states.
- name: Group overcloud nodes by their BMC type
hosts: overcloud
gather_facts: no
max_fail_percentage: >-
{{ overcloud_bios_raid_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- bios
- raid
vars:
# List of BMC types supporting BIOS and RAID configuration.
supported_bmc_types:
- idrac
tasks:
- name: Fail if node has BIOS and/or RAID configuration and BMC type is not supported
fail:
msg: >
Node has BIOS and/or RAID configuration but BMC type
{% if bmc_type is undefined %}is not defined{% else %}{{ bmc_type }}
is not supported{% endif %}.
when:
- bios_config or raid_config
- bmc_type is undefined or bmc_type not in supported_bmc_types
- name: Group overcloud hosts by their BMC type
group_by:
key: "overcloud_with_bmcs_of_type_{{ bmc_type | default('unknown') }}"
changed_when: false
- name: Check whether any changes to nodes' BIOS and RAID configuration are required
hosts: overcloud_with_bmcs_of_type_idrac
gather_facts: no
max_fail_percentage: >-
{{ overcloud_bios_raid_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- bios
- raid
vars:
# Set this to False to avoid rebooting the nodes after configuration.
drac_reboot: True
roles:
- role: stackhpc.drac
drac_address: "{{ ipmi_address }}"
drac_username: "{{ ipmi_username }}"
drac_password: "{{ ipmi_password }}"
drac_bios_config: "{{ bios_config }}"
drac_raid_config: "{{ raid_config }}"
drac_check_mode: True
tasks:
- name: Set a fact about whether the configuration changed
set_fact:
bios_or_raid_change: "{{ drac_result is changed }}"
- name: Ensure that overcloud BIOS and RAID volumes are configured
hosts: overcloud_with_bmcs_of_type_idrac
gather_facts: no
max_fail_percentage: >-
{{ overcloud_bios_raid_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- bios
- raid
vars:
# Set this to False to avoid rebooting the nodes after configuration.
drac_reboot: True
seed_host: "{{ groups['seed'][0] }}"
pre_tasks:
- name: Set the overcloud nodes' maintenance mode
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node maintenance set {% raw %}{{ inventory_hostname }}{% endraw %} --reason BIOS-RAID"'
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
when: bios_or_raid_change | bool
roles:
- role: stackhpc.drac
drac_address: "{{ ipmi_address }}"
drac_username: "{{ ipmi_username }}"
drac_password: "{{ ipmi_password }}"
drac_bios_config: "{{ bios_config }}"
drac_raid_config: "{{ raid_config }}"
when: bios_or_raid_change | bool
tasks:
- name: Unset the overcloud nodes' maintenance mode
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node maintenance unset {% raw %}{{ inventory_hostname }}{% endraw %}"'
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
when: bios_or_raid_change | bool