kayobe/ansible/vgpu.yml
Mark Goddard 6c54ce4d3b Introduce max fail percentage to playbooks
This allows us to continue execution until a certain proportion of hosts
fail. This can be useful at scale, where failures are common, and
restarting a deployment is time-consuming.

The default max failure percentage is 100, keeping the default
behaviour. A global max failure percentage may be set via
kayobe_max_fail_percentage, and individual playbooks may define a max
failure percentage via <playbook>_max_fail_percentage.

Related Kolla Ansible patch:
https://review.opendev.org/c/openstack/kolla-ansible/+/805598

Change-Id: Ib81c72b63be5765cca664c38141ffc769640cf07
2024-06-03 16:24:29 +00:00

58 lines
1.3 KiB
YAML

---
- name: Configure IOMMU
hosts: iommu
max_fail_percentage: >-
{{ vgpu_max_fail_percentage |
default(host_configure_max_fail_percentage) |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- iommu
- vgpu
tasks:
- import_role:
name: stackhpc.linux.iommu
handlers:
- name: Register that a reboot is required
set_fact:
kayobe_needs_reboot: true
listen: reboot
- name: Configure NVIDIA VGPUs
hosts: vgpu
max_fail_percentage: >-
{{ vgpu_max_fail_percentage |
default(host_configure_max_fail_percentage) |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- vgpu
tasks:
- import_role:
name: stackhpc.linux.vgpu
handlers:
- name: Register that a reboot is required
set_fact:
kayobe_needs_reboot: true
listen: reboot
- name: Reboot when required
hosts: iommu:vgpu
max_fail_percentage: >-
{{ vgpu_max_fail_percentage |
default(host_configure_max_fail_percentage) |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- reboot
- iommu
- vgpu
tasks:
- name: Reboot
reboot:
reboot_timeout: "{{ vgpu_reboot_timeout }}"
become: true
when:
- kayobe_needs_reboot | default(false) | bool
- vgpu_do_reboot | bool