6c54ce4d3b
This allows us to continue execution until a certain proportion of hosts fail. This can be useful at scale, where failures are common, and restarting a deployment is time-consuming. The default max failure percentage is 100, keeping the default behaviour. A global max failure percentage may be set via kayobe_max_fail_percentage, and individual playbooks may define a max failure percentage via <playbook>_max_fail_percentage. Related Kolla Ansible patch: https://review.opendev.org/c/openstack/kolla-ansible/+/805598 Change-Id: Ib81c72b63be5765cca664c38141ffc769640cf07
58 lines
1.3 KiB
YAML
58 lines
1.3 KiB
YAML
---
|
|
- name: Configure IOMMU
|
|
hosts: iommu
|
|
max_fail_percentage: >-
|
|
{{ vgpu_max_fail_percentage |
|
|
default(host_configure_max_fail_percentage) |
|
|
default(kayobe_max_fail_percentage) |
|
|
default(100) }}
|
|
tags:
|
|
- iommu
|
|
- vgpu
|
|
tasks:
|
|
- import_role:
|
|
name: stackhpc.linux.iommu
|
|
handlers:
|
|
- name: Register that a reboot is required
|
|
set_fact:
|
|
kayobe_needs_reboot: true
|
|
listen: reboot
|
|
|
|
- name: Configure NVIDIA VGPUs
|
|
hosts: vgpu
|
|
max_fail_percentage: >-
|
|
{{ vgpu_max_fail_percentage |
|
|
default(host_configure_max_fail_percentage) |
|
|
default(kayobe_max_fail_percentage) |
|
|
default(100) }}
|
|
tags:
|
|
- vgpu
|
|
tasks:
|
|
- import_role:
|
|
name: stackhpc.linux.vgpu
|
|
handlers:
|
|
- name: Register that a reboot is required
|
|
set_fact:
|
|
kayobe_needs_reboot: true
|
|
listen: reboot
|
|
|
|
- name: Reboot when required
|
|
hosts: iommu:vgpu
|
|
max_fail_percentage: >-
|
|
{{ vgpu_max_fail_percentage |
|
|
default(host_configure_max_fail_percentage) |
|
|
default(kayobe_max_fail_percentage) |
|
|
default(100) }}
|
|
tags:
|
|
- reboot
|
|
- iommu
|
|
- vgpu
|
|
tasks:
|
|
- name: Reboot
|
|
reboot:
|
|
reboot_timeout: "{{ vgpu_reboot_timeout }}"
|
|
become: true
|
|
when:
|
|
- kayobe_needs_reboot | default(false) | bool
|
|
- vgpu_do_reboot | bool
|