Merge "enroll-init: reconfiguration flexibility"
This commit is contained in:
commit
7c511a7cd5
73
playbookconfig/src/playbooks/enroll_init.yml
Normal file
73
playbookconfig/src/playbooks/enroll_init.yml
Normal file
@ -0,0 +1,73 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This playbook executes the enroll-init stage of subcloud enrollment,
|
||||
# using cloud-init on the target system and rvmc to insert a seed image.
|
||||
# Overall, this triggers OAM network and password updates required for subsequent
|
||||
# enrollment stages.
|
||||
#
|
||||
|
||||
- name: Enroll Init Playbook
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
become: no
|
||||
|
||||
tasks:
|
||||
- set_fact:
|
||||
sysinv_port: "{{ sysinv_port | default(6385) }}"
|
||||
boot_wait_time: "{{ enroll_boot_wait_time | default(150) }}"
|
||||
wait_for_timeout: "{{ enroll_wait_for_timeout | default(900) }}"
|
||||
job_retry_delay: "{{ 120 | random }}"
|
||||
protocol: "{{ protocol | default('https') }}"
|
||||
operation_string: "enroll-init"
|
||||
|
||||
- name: Run RVMC script to insert seed image and power on host
|
||||
include_role:
|
||||
name: common/rvmc
|
||||
|
||||
- debug:
|
||||
msg: "Waiting for the system to enroll init..."
|
||||
|
||||
# Wait for the sysinv API to open, ensuring that endpoints
|
||||
# are reconfigured with the new address.
|
||||
- name: Waiting {{ wait_for_timeout }} seconds for port {{ sysinv_port }} become open on {{ enroll_reconfigured_oam }}
|
||||
local_action:
|
||||
module: wait_for
|
||||
port={{ sysinv_port }}
|
||||
host={{ enroll_reconfigured_oam }}
|
||||
delay={{ boot_wait_time }}
|
||||
timeout={{ wait_for_timeout }}
|
||||
state=started
|
||||
msg="Timeout waiting for {{ enroll_reconfigured_oam }}:{{ sysinv_port }}. Err_code=wait_enroll_init"
|
||||
|
||||
# The seed ISO must be ejected to prevent it from being reapplied on reboot.
|
||||
# This is necessary because cloud-config is set to run always and cloud-init services
|
||||
# remain enabled until enrollment is complete (allowing for the possibility
|
||||
# of re-running enroll-init if needed)
|
||||
- name: Run RVMC script to eject image
|
||||
include_role:
|
||||
name: common/rvmc
|
||||
vars:
|
||||
eject_image_only: true
|
||||
|
||||
# During the OAM update, several async operations may trigger one another.
|
||||
# Although the endpoints are reconfigured by now, the API and certs may not
|
||||
# be fully updated. A simple curl request can be used to verify both, specifically
|
||||
# checking the region_id API, which will be needed shortly after this playbook completes.
|
||||
- name: Wait for the sysinv API to be ready and for certs to be updated for the reconfigured OAM endpoint
|
||||
shell: |
|
||||
curl -s -o /dev/null -w '%{http_code}' \
|
||||
{{ protocol }}://{{ enroll_reconfigured_oam }}:{{ sysinv_port }}/v1/isystems/region_id
|
||||
register: api_response
|
||||
retries: 30
|
||||
delay: 20
|
||||
until: api_response.stdout == "200"
|
||||
delegate_to: localhost
|
||||
args:
|
||||
# Disable warning that suggests using the get_url and uri module:
|
||||
# - get_url is unnecessary as we're not actually downloading.
|
||||
# - uri module doesn't seem to work for our cert update check
|
||||
warn: false
|
@ -16,104 +16,14 @@
|
||||
boot_wait_time: "{{ boot_wait_time | default(600) }}"
|
||||
wait_for_timeout: "{{ wait_for_timeout | default(3600) }}"
|
||||
job_retry_delay: "{{ 120 | random }}"
|
||||
operation_string: "install"
|
||||
host_to_check: "{{ ansible_host }}"
|
||||
enroll_init: "{{ enroll_reconfigured_oam is defined }}"
|
||||
|
||||
- name: Set facts for enroll_init
|
||||
set_fact:
|
||||
# ansible_port is waited on for an open connection,
|
||||
# set it to sysinv_api_port to ensure that endpoints have
|
||||
# reconfigured before marking the playbook as complete.
|
||||
ansible_port: 6385
|
||||
operation_string: "enroll-init"
|
||||
host_to_check: "{{ enroll_reconfigured_oam }}"
|
||||
boot_wait_time: "{{ enroll_boot_wait_time | default(150) }}"
|
||||
wait_for_timeout: "{{ enroll_wait_for_timeout | default(900) }}"
|
||||
when: enroll_init
|
||||
|
||||
# The following block is executed locally
|
||||
- block:
|
||||
- name: Run rvmc script
|
||||
script: >
|
||||
/usr/local/bin/rvmc_install.py
|
||||
--debug={{ rvmc_debug_level | default(0) }}
|
||||
--subcloud_name="{{ inventory_hostname }}"
|
||||
--config_file="{{ rvmc_config_file }}"
|
||||
# Script return code:
|
||||
# 0 - Success
|
||||
# 1 - Retryable failures
|
||||
# 2 - Non-retryable failures
|
||||
# (eg. Invalid credentials, Script execution timeout in 30 minutes,
|
||||
# Failed to terminate the previous process.)
|
||||
# Refer to rvmc_install.py in the distributedcloud repository for details.
|
||||
until: script_result.rc != 1
|
||||
register: script_result
|
||||
retries: 1
|
||||
delay: "{{ job_retry_delay }}"
|
||||
failed_when: false
|
||||
|
||||
- name: Display script output
|
||||
debug:
|
||||
msg: "{{ script_result.stdout }}"
|
||||
|
||||
# Block to isolate failure message
|
||||
- block:
|
||||
- name: Set credential failure flag
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Credential failure.
|
||||
Action: Check BMC username and password in config file
|
||||
when: '"Action: Check BMC username and password in config file" in script_result.stdout'
|
||||
|
||||
- name: Set output msg if BMC is unreachable.
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Ping to BMC has failed.
|
||||
Check BMC values in install values file.
|
||||
Ensure you can ssh into BMC using these credentials.
|
||||
Err_code=ping_bmc
|
||||
when: '"Action: Check BMC ip address is pingable" in script_result.stdout'
|
||||
|
||||
- name: Set output message if session creation fails
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Failed to connect to BMC.
|
||||
Check BMC credentials in install values file.
|
||||
Ensure you can ssh into BMC using these credentials.
|
||||
Err_code=bmc_cred
|
||||
when: '"Failed to Create session" in script_result.stdout'
|
||||
|
||||
- name: Set output message if the script execution times out
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
BMC operations timed out.
|
||||
Please review the script output to identify the operation
|
||||
that is currently stuck.
|
||||
Err_code=rvmc_timeout
|
||||
when: '"RVMC script execution timed out" in script_result.stdout'
|
||||
|
||||
- name: Set output message if failed to terminate the previous RVMC process
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Failed to terminate the previous RVMC process.
|
||||
Please review the script output to find out the previous
|
||||
RVMC process ID.
|
||||
Err_code=rvmc_process
|
||||
when: '"Failed to terminate the previous process" in script_result.stdout'
|
||||
|
||||
- name: Fail if Redfish Virtual Media Controller returns an error
|
||||
fail:
|
||||
msg: |
|
||||
Failed to {{ operation_string }} the host via Redfish Virtual Media Controller.
|
||||
{{ failure_msg | default('The RVMC script exited with failure response or exception.') }}
|
||||
|
||||
when: script_result.rc != 0
|
||||
|
||||
delegate_to: localhost
|
||||
- name: Run RVMC script to insert boot image and power on host
|
||||
include_role:
|
||||
name: common/rvmc
|
||||
|
||||
- debug:
|
||||
msg: "Waiting for the system to {{ operation_string }}..."
|
||||
msg: "Waiting for the system to install..."
|
||||
|
||||
- name: Waiting {{ wait_for_timeout }} seconds for port {{ ansible_port }} become open on {{ host_to_check }}
|
||||
local_action:
|
||||
@ -123,24 +33,7 @@
|
||||
delay={{ boot_wait_time }}
|
||||
timeout={{ wait_for_timeout }}
|
||||
state=started
|
||||
msg="Timeout waiting for {{ host_to_check }}:{{ ansible_port }}. Err_code=wait_{{ operation_string }}"
|
||||
|
||||
# TODO (srana): Now that enroll-init tasks diverge from install, consider separating enroll-init
|
||||
# from the install playbook. The RVMC script block should be made common so both install and
|
||||
# enroll-init can use it independently.
|
||||
- name: Wait for sysinv API to be ready on the reconfigured OAM endpoint
|
||||
local_action:
|
||||
module: uri
|
||||
url: "https://{{ host_to_check }}:{{ ansible_port }}/v1/isystems/region_id"
|
||||
method: GET
|
||||
validate_certs: no
|
||||
register: api_response
|
||||
retries: 10
|
||||
delay: 6
|
||||
until: api_response.status == 200
|
||||
# The caller (DCManager) will try and report an error in case of failure
|
||||
failed_when: false
|
||||
when: enroll_init
|
||||
msg="Timeout waiting for {{ host_to_check }}:{{ ansible_port }}. Err_code=wait_install"
|
||||
|
||||
- name: Run validate host playbook post install
|
||||
import_playbook: validate_host.yml
|
||||
@ -152,4 +45,3 @@
|
||||
sync_patch_metadata: true
|
||||
sync_software_metadata: true
|
||||
enforce_password_change: true
|
||||
when: not enroll_init
|
||||
|
@ -0,0 +1,94 @@
|
||||
---
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This role handles the execution of the RVMC script, including error handling
|
||||
# and reporting. It is primarily used for subcloud remote installation and
|
||||
# subcloud enrollment, where an image is inserted, and the subcloud is rebooted.
|
||||
#
|
||||
|
||||
# The following block is executed locally
|
||||
- block:
|
||||
- name: Run rvmc script
|
||||
script: >
|
||||
/usr/local/bin/rvmc_install.py
|
||||
--debug={{ rvmc_debug_level | default(0) }}
|
||||
--subcloud_name="{{ inventory_hostname }}"
|
||||
--config_file="{{ rvmc_config_file }}"
|
||||
{% if eject_image_only | default(false) %}
|
||||
--eject_image_only
|
||||
{% endif %}
|
||||
|
||||
# Script return code:
|
||||
# 0 - Success
|
||||
# 1 - Retryable failures
|
||||
# 2 - Non-retryable failures
|
||||
# (eg. Invalid credentials, Script execution timeout in 30 minutes,
|
||||
# Failed to terminate the previous process.)
|
||||
# Refer to rvmc_install.py in the distributedcloud repository for details.
|
||||
until: script_result.rc != 1
|
||||
register: script_result
|
||||
retries: 1
|
||||
delay: "{{ job_retry_delay }}"
|
||||
failed_when: false
|
||||
|
||||
- name: Display script output
|
||||
debug:
|
||||
msg: "{{ script_result.stdout }}"
|
||||
|
||||
# Block to isolate failure message
|
||||
- block:
|
||||
- name: Set credential failure flag
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Credential failure.
|
||||
Action: Check BMC username and password in config file
|
||||
when: '"Action: Check BMC username and password in config file" in script_result.stdout'
|
||||
|
||||
- name: Set output msg if BMC is unreachable.
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Ping to BMC has failed.
|
||||
Check BMC values in install values file.
|
||||
Ensure you can ssh into BMC using these credentials.
|
||||
Err_code=ping_bmc
|
||||
when: '"Action: Check BMC ip address is pingable" in script_result.stdout'
|
||||
|
||||
- name: Set output message if session creation fails
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Failed to connect to BMC.
|
||||
Check BMC credentials in install values file.
|
||||
Ensure you can ssh into BMC using these credentials.
|
||||
Err_code=bmc_cred
|
||||
when: '"Failed to Create session" in script_result.stdout'
|
||||
|
||||
- name: Set output message if the script execution times out
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
BMC operations timed out.
|
||||
Please review the script output to identify the operation
|
||||
that is currently stuck.
|
||||
Err_code=rvmc_timeout
|
||||
when: '"RVMC script execution timed out" in script_result.stdout'
|
||||
|
||||
- name: Set output message if failed to terminate the previous RVMC process
|
||||
set_fact:
|
||||
failure_msg: >-
|
||||
Failed to terminate the previous RVMC process.
|
||||
Please review the script output to find out the previous
|
||||
RVMC process ID.
|
||||
Err_code=rvmc_process
|
||||
when: '"Failed to terminate the previous process" in script_result.stdout'
|
||||
|
||||
- name: Fail if Redfish Virtual Media Controller returns an error
|
||||
fail:
|
||||
msg: |
|
||||
Failed to {{ operation_string | default('install') }} the host via Redfish Virtual Media Controller.
|
||||
{{ failure_msg | default('The RVMC script exited with failure response or exception.') }}
|
||||
|
||||
when: script_result.rc != 0
|
||||
|
||||
delegate_to: localhost
|
@ -12,6 +12,10 @@
|
||||
include_role:
|
||||
name: common/check-services-status
|
||||
|
||||
- name: Cleanup the cloud-init services
|
||||
command: /usr/local/bin/enroll-init-cleanup
|
||||
become: yes
|
||||
|
||||
- name: Lock controller-0
|
||||
include_role:
|
||||
name: common/host-lock
|
||||
|
Loading…
x
Reference in New Issue
Block a user