diff --git a/ansible/dell-compute-node-discovery.yml b/ansible/dell-compute-node-discovery.yml new file mode 100644 index 000000000..289b9274e --- /dev/null +++ b/ansible/dell-compute-node-discovery.yml @@ -0,0 +1,44 @@ +--- +- name: Ensure compute nodes are present in the Ansible inventory + hosts: config-mgmt + gather_facts: no + vars: + # Set this to a colon-separated list of compute node hostnames on which to + # trigger discovery. If unset, all compute nodes will be triggered. + compute_node_limit: "" + compute_node_limit_list: "{{ compute_node_limit.split(':') }}" + tasks: + - name: Add hosts for the compute nodes + add_host: + name: "{{ item.key }}" + groups: compute + with_dict: "{{ idrac_network_ips }}" + # Don't add hosts that already exist. + when: + - "{{ item.key not in groups['all'] }}" + - "{{ item.key | replace('-idrac', '') not in groups['all'] }}" + - "{{ not compute_node_limit or item.key | replace('-idrac', '') in compute_node_limit_list }}" + run_once: True + +- name: Ensure compute nodes are PXE booted + hosts: compute + gather_facts: no + vars: + # Set this to the index of the inteface on which to enable PXE. + drac_pxe_interface: 1 + # Set this to a list of indices of all interfaces. + drac_pxe_all_interfaces: [1, 2, 3, 4] + tasks: + - name: Set a fact containing the compute node IPMI address + set_fact: + ansible_host: "{{ idrac_network_ips[inventory_hostname] }}" + ansible_user: "{{ ipmi_username }}" + ansible_ssh_pass: "{{ ipmi_password }}" + + - name: Ensure the compute node's BMC is added as a known host + include_role: + role: ssh-known-host + + - name: Ensure the compute node's BMC is set to PXE boot + include_role: + role: drac-pxe diff --git a/ansible/roles/drac-pxe/defaults/main.yml b/ansible/roles/drac-pxe/defaults/main.yml new file mode 100644 index 000000000..37dccf54a --- /dev/null +++ b/ansible/roles/drac-pxe/defaults/main.yml @@ -0,0 +1,15 @@ +--- +# Index of interface on which to enable PXE booting. +drac_pxe_interface: 1 + +# List of numerical indices of all interfaces. +drac_pxe_all_interfaces: [] + +# Timeout when waiting for boot sequence configuration to be applied. +drac_pxe_timeout: 600 + +# Number of times to attempt to perform write operations. +drac_pxe_retries: 5 + +# Interval between successive write operations. +drac_pxe_interval: 5 diff --git a/ansible/roles/drac-pxe/tasks/main.yml b/ansible/roles/drac-pxe/tasks/main.yml new file mode 100644 index 000000000..098c6d133 --- /dev/null +++ b/ansible/roles/drac-pxe/tasks/main.yml @@ -0,0 +1,97 @@ +--- +- name: Check the boot mode + raw: "racadm get BIOS.BiosBootSettings.BootMode" + register: result + failed_when: "'ERROR' in result.stdout" + changed_when: False + +- name: Set a fact containing the boot mode + set_fact: + # Format of last line is: + # BootMode=[ (Pending Value=)] + boot_mode_is_uefi: "{{ result.stdout_lines[-1] == 'BootMode=Uefi' }}" + boot_mode_is_bios: "{{ result.stdout_lines[-1] == 'BootMode=Bios' }}" + +- name: Fail if unable to determine the boot mode + fail: + msg: > + Unable to determine the boot mode. Got: {{ result.stdout }}. Expected + bios or uefi. + when: + - "{{ not boot_mode_is_bios }}" + - "{{ not boot_mode_is_uefi }}" + +- block: + - name: Ensure NIC boot protocol is configured + raw: "racadm set Nic.NICConfig.{{ item }}.LegacyBootProto {% if item == drac_pxe_interface %}PXE{% else %}NONE{% endif %}" + with_items: "{{ drac_pxe_all_interfaces }}" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + + - name: Ensure NIC configuration jobs are created + raw: "racadm jobqueue create NIC.Integrated.1-{{ item }}-1 -s TIME_NOW" + with_items: "{{ drac_pxe_all_interfaces }}" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + + - name: Ensure BIOS boot sequence is configured + raw: "racadm set BIOS.BiosBootSettings.bootseq NIC.Integrated.1-{{ drac_pxe_interface }}-1,HardDisk.List.1-1" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + + when: "{{ boot_mode_is_bios }}" + +- block: + - name: Ensure UEFI PXE device is configured + raw: "racadm set BIOS.PxeDev1Settings.PxeDev1Interface NIC.Integrated.1-{{ drac_pxe_interface }}-1" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + + - name: Ensure UEFI boot sequence is configured + raw: "racadm set BIOS.BiosBootSettings.UefiBootSeq NIC.PxeDevice.1-1,RAID.Integrated.1-1" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + + when: "{{ boot_mode_is_uefi }}" + +- name: Ensure BIOS configuration job is created + raw: "racadm jobqueue create BIOS.Setup.1-1 -s TIME_NOW" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ drac_pxe_busy_message not in result.stdout }}" + retries: "{{ drac_pxe_retries }}" + delay: "{{ drac_pxe_interval }}" + +- name: Set a fact containing the BIOS configuration job ID + set_fact: + # Format of the last line is: + # JOB_ID = + bios_job_id: "{{ result.stdout_lines[-1].split()[-1] }}" + +- name: Ensure server is rebooted + raw: "racadm serveraction powercycle" + register: result + failed_when: "'ERROR' in result.stdout" + +- name: Wait for the BIOS configuration job to complete + raw: "racadm jobqueue view -i {{ bios_job_id }}" + register: result + failed_when: "'ERROR' in result.stdout" + until: "{{ 'Status=Completed' in result.stdout }}" + retries: "{{ drac_pxe_timeout // drac_pxe_interval }}" + delay: "{{ drac_pxe_interval }}" diff --git a/ansible/roles/drac-pxe/vars/main.yml b/ansible/roles/drac-pxe/vars/main.yml new file mode 100644 index 000000000..aad95e6eb --- /dev/null +++ b/ansible/roles/drac-pxe/vars/main.yml @@ -0,0 +1,3 @@ +--- +# Message emitted by the DRAC which can be retried. +drac_pxe_busy_message: "ERROR: Lifecycle Controller is currently in use."