kayobe/ansible/overcloud-provision.yml
Pierre Riteau 3e03884c47 Speed up calls to Bifrost
For overcloud commands involving Bifrost (e.g. overcloud provision), we
use the Bifrost dynamic inventory. This runs bifrost_inventory.py which
gathers node and port information for all bare metal hosts. Because this
gets executed for each node in our inventory, it produces a large number
of API requests to Ironic, which grows as the number of hosts increases.

Halve the number of calls using the BIFROST_NODE_NAMES environment
variable, which restricts fetching port information to just the node
being queried. bifrost_inventory.py still performs one call for each
node, which must be fixed separately in Bifrost [1].

Time to query overcloud nodes' hardware introspection data for 44 hosts:

* before patch: 823 seconds
* after patch: 415 seconds

With the patch to Bifrost [1], this is further reduced to 46 seconds.

[1] https://review.opendev.org/c/openstack/bifrost/+/882950

Change-Id: I341075115442a38c327e3ade74b81b1b9e7e85c6
2023-08-29 16:53:49 +02:00

222 lines
8.4 KiB
YAML

---
# Use bifrost to provision the overcloud nodes with a base OS.
- name: Ensure the overcloud nodes are provisioned
hosts: overcloud
tags:
- provision
vars:
wait_manageable_timeout: 60
wait_available_timeout: 60
# Set to False to avoid waiting for the nodes to become active.
wait_active: True
wait_active_timeout: 600
wait_active_interval: 10
# Set to False to avoid waiting for the nodes to be accessible via
# SSH.
wait_ssh: True
wait_ssh_timeout: 600
# List of states from which we can get to active.
provisionable_states:
- enroll
- manageable
- available
- active
# List of valid states while a node is being provisioned.
deploying_states:
# The API is asynchronous, so allow the initial state.
- available
- deploying
- wait call-back
# List of hosts to limit Bifrost deploy-dynamic.yaml playbook to.
bifrost_limit: ['localhost']
# Retries to use when using Ironic API and hitting node locked errors.
ironic_retries: 6
ironic_retry_interval: 5
seed_host: "{{ groups['seed'][0] }}"
gather_facts: no
tasks:
- name: Check the ironic node's initial provision state
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export OS_BAREMETAL_API_VERSION=1.34 &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node show {% raw %}{{ inventory_hostname }}{% endraw %} -f value -c provision_state"'
register: show_result
changed_when: False
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Set a fact containing the ironic node's initial provision state
set_fact:
initial_provision_state: "{{ show_result.stdout_lines[1] }}"
- name: Fail if the ironic node is in an unexpected provision state
fail:
msg: >
Ironic node for {{ inventory_hostname }} is in an unexpected
initial provision state: {{ initial_provision_state }}. Expected
states are: {{ provisionable_states | join(',') }}.
when: initial_provision_state not in provisionable_states
- name: Ensure the ironic node is manageable
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal -vvvv
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node manage --wait {{ wait_manageable_timeout }} {% raw %}{{ inventory_hostname }}{% endraw %}"'
register: manage_result
until: manage_result is successful or 'is locked by host' in manage_result.stdout
retries: "{{ ironic_retries }}"
delay: "{{ ironic_retry_interval }}"
when: initial_provision_state == 'enroll'
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Ensure the ironic node is available
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal -vvvv
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node provide --wait {{ wait_available_timeout }} {% raw %}{{ inventory_hostname }}{% endraw %}"'
register: provide_result
until: >-
provide_result is successful or
'is locked by host' in provide_result.stdout or
'while it is in state "available"' in provide_result.stdout
failed_when:
- provide_result is not successful
# If fast-track deployment is enabled, it's possible that ironic will
# move the node to available, even though the initial state was
# manageable.
- "'while it is in state \"available\"' not in provide_result.stdout"
retries: "{{ ironic_retries }}"
delay: "{{ ironic_retry_interval }}"
when: initial_provision_state in ['enroll', 'manageable']
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Set a fact containing the bifrost host list
set_fact:
bifrost_limit: "{{ bifrost_limit + [item] }}"
with_items: "{{ play_hosts }}"
when: hostvars[item].initial_provision_state != 'active'
run_once: True
- name: Ensure the ironic nodes are provisioned
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
ansible-playbook -vvvv
/bifrost/playbooks/deploy-dynamic.yaml
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ bifrost_limit | join(':') }}'
when: bifrost_limit
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
# We execute this only once, allowing the Bifrost Ansible to handle
# multiple nodes.
run_once: True
- name: Wait for the ironic node to become active
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export OS_BAREMETAL_API_VERSION=1.34 &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node show {% raw %}{{ inventory_hostname }}{% endraw %} -f value -c provision_state"'
register: show_result
# Wait until the node is no longer in one of the deploying states.
until: not show_result.stdout_lines[1:] | intersect(deploying_states)
retries: "{{ wait_active_timeout // wait_active_interval }}"
delay: "{{ wait_active_interval }}"
when:
- wait_active | bool
- initial_provision_state != 'active'
changed_when: False
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Set a fact containing the final provision state
set_fact:
final_provision_state: "{{ show_result.stdout_lines[1] }}"
when:
- wait_active | bool
- initial_provision_state != 'active'
- name: Fail if any of the nodes are not available
fail:
msg: >
Ironic node for {{ inventory_hostname }} is in an unexpected
provision state after provisioning. Ironic provision state:
{{ final_provision_state }}. Expected: active.
when:
- wait_active | bool
- initial_provision_state != 'active'
- final_provision_state != 'active'
- name: Wait for SSH access to the nodes
local_action:
module: wait_for
host: "{{ ansible_host }}"
port: 22
state: started
timeout: "{{ wait_ssh_timeout }}"
when: wait_ssh | bool