tripleo-common/workbooks/baremetal.yaml
Steve Baker c31faaa286 wait_for_introspection_to_finish_error set status FAILED
Currently when wait_for_introspection_to_finish_error is reached,
the _introspect workflow will pass as a success even when it
actually failed. This means that the retry logic in the calling
workflow is never triggered.

wait_for_introspection_to_finish_error is hit in downstream CI
when ironic-introspector is under too much load to respond
to status poll requests.

This change also ensures callers to the
tripleo.baremetal.v1.introspect can override the default concurrency
so that this can be changed when required.

Change-Id: Ifd88ff9175bc6ca583e3826c59787680e25fbea3
Partial-Bug: #1836976
2019-07-24 00:48:12 +00:00

1493 lines
46 KiB
YAML

---
version: '2.0'
name: tripleo.baremetal.v1
description: TripleO Baremetal Workflows
workflows:
set_node_state:
input:
- node_uuid
- state_action
- target_state
- error_states:
# The default includes all failure states, even unused by TripleO.
- 'error'
- 'adopt failed'
- 'clean failed'
- 'deploy failed'
- 'inspect failed'
- 'rescue failed'
tags:
- tripleo-common-managed
tasks:
set_provision_state:
on-success: wait_for_provision_state
on-error: set_provision_state_failed
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state=<% $.state_action %>
set_provision_state_failed:
publish:
message: <% task(set_provision_state).result %>
on-complete: fail
wait_for_provision_state:
action: ironic.node_get
input:
node_id: <% $.node_uuid %>
fields: ['provision_state', 'last_error']
timeout: 1200 #20 minutes
retry:
delay: 3
count: 400
continue-on: <% not task().result.provision_state in [$.target_state] + $.error_states %>
on-complete:
- state_not_reached: <% task().result.provision_state != $.target_state %>
state_not_reached:
publish:
message: >-
Node <% $.node_uuid %> did not reach state "<% $.target_state %>",
the state is "<% task(wait_for_provision_state).result.provision_state %>",
error: <% task(wait_for_provision_state).result.last_error %>
on-complete: fail
output-on-error:
result: <% $.message %>
set_power_state:
input:
- node_uuid
- state_action
- target_state
- error_state: 'error'
tags:
- tripleo-common-managed
tasks:
set_power_state:
on-success: wait_for_power_state
on-error: set_power_state_failed
action: ironic.node_set_power_state node_id=<% $.node_uuid %> state=<% $.state_action %>
set_power_state_failed:
publish:
message: <% task(set_power_state).result %>
on-complete: fail
wait_for_power_state:
action: ironic.node_get
input:
node_id: <% $.node_uuid %>
fields: ['power_state', 'last_error']
timeout: 120 #2 minutes
retry:
delay: 6
count: 20
continue-on: <% not task().result.power_state in [$.target_state, $.error_state] %>
on-complete:
- state_not_reached: <% task().result.power_state != $.target_state %>
state_not_reached:
publish:
message: >-
Node <% $.node_uuid %> did not reach power state "<% $.target_state %>",
the state is "<% task(wait_for_power_state).result.power_state %>",
error: <% task(wait_for_power_state).result.last_error %>
on-complete: fail
output-on-error:
result: <% $.message %>
manual_cleaning:
input:
- node_uuid
- clean_steps
- timeout: 7200 # 2 hours (cleaning can take really long)
- retry_delay: 10
- retry_count: 720
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_provision_state:
on-success: wait_for_provision_state
on-error: set_provision_state_failed
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state='clean' cleansteps=<% $.clean_steps %>
set_provision_state_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_provision_state).result %>
wait_for_provision_state:
action: ironic.node_get node_id=<% $.node_uuid %>
timeout: <% $.timeout %>
retry:
delay: <% $.retry_delay %>
count: <% $.retry_count %>
continue-on: <% task().result.provision_state != 'manageable' %>
on-complete:
- send_message: <% task().result.provision_state = 'manageable' %>
- state_not_reached: <% task().result.provision_state != 'manageable' %>
state_not_reached:
publish:
status: FAILED
message: Cleaning of node <% $.node_uuid %> timed out.
on-complete: send_message
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
validate_nodes:
description: Validate nodes JSON
input:
- nodes_json
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
validate_nodes:
action: tripleo.baremetal.validate_nodes
on-success: send_message
on-error: validation_failed
input:
nodes_json: <% $.nodes_json %>
validation_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(validate_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
register_or_update:
description: Take nodes JSON and create nodes in a "manageable" state
input:
- nodes_json
- remove: False
- queue_name: tripleo
- kernel_name: null
- ramdisk_name: null
- instance_boot_option: null
- initial_state: manageable
tags:
- tripleo-common-managed
tasks:
validate_input:
workflow: tripleo.baremetal.v1.validate_nodes
on-success: register_or_update_nodes
on-error: validation_failed
input:
nodes_json: <% $.nodes_json %>
queue_name: <% $.queue_name %>
validation_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(validate_input).result %>
registered_nodes: []
register_or_update_nodes:
action: tripleo.baremetal.register_or_update_nodes
on-success:
- set_nodes_managed: <% $.initial_state != "enroll" %>
- send_message: <% $.initial_state = "enroll" %>
on-error: set_status_failed_register_or_update_nodes
input:
nodes_json: <% $.nodes_json %>
remove: <% $.remove %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
publish:
registered_nodes: <% task().result %>
new_nodes: <% task().result.where($.provision_state = 'enroll') %>
set_status_failed_register_or_update_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(register_or_update_nodes).result %>
registered_nodes: []
set_nodes_managed:
on-success:
- set_nodes_available: <% $.initial_state = "available" %>
- send_message: <% $.initial_state != "available" %>
on-error: set_status_failed_nodes_managed
workflow: tripleo.baremetal.v1.manage
input:
node_uuids: <% $.new_nodes.uuid %>
queue_name: <% $.queue_name %>
publish:
status: SUCCESS
message: <% $.new_nodes.len() %> node(s) successfully moved to the "manageable" state.
set_status_failed_nodes_managed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_managed).result %>
set_nodes_available:
on-success: send_message
on-error: set_status_failed_nodes_available
workflow: tripleo.baremetal.v1.provide node_uuids=<% $.new_nodes.uuid %> queue_name=<% $.queue_name %>
publish:
status: SUCCESS
message: <% $.new_nodes.len() %> node(s) successfully moved to the "available" state.
set_status_failed_nodes_available:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_available).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { registered_nodes => $.registered_nodes or [] } %>
provide:
description: Take a list of nodes and move them to "available"
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_nodes_available:
on-success: cell_v2_discover_hosts
on-error: set_status_failed_nodes_available
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_node_state
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
state_action: 'provide'
target_state: 'available'
set_status_failed_nodes_available:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_available).result %>
cell_v2_discover_hosts:
on-success: try_power_off
on-error: cell_v2_discover_hosts_failed
workflow: tripleo.baremetal.v1.cellv2_discovery
input:
node_uuids: <% $.node_uuids %>
queue_name: <% $.queue_name %>
timeout: 900 #15 minutes
retry:
delay: 30
count: 30
cell_v2_discover_hosts_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(cell_v2_discover_hosts).result %>
try_power_off:
on-success: send_message
on-error: power_off_failed
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_power_state
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
state_action: 'off'
target_state: 'power off'
publish:
status: SUCCESS
message: <% $.node_uuids.len() %> node(s) successfully moved to the "available" state.
power_off_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(try_power_off).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
provide_manageable_nodes:
description: Provide all nodes in a 'manageable' state.
input:
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: provide_manageable
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
provide_manageable:
on-success: send_message
workflow: tripleo.baremetal.v1.provide
input:
node_uuids: <% $.managed_nodes %>
queue_name: <% $.queue_name %>
publish:
status: SUCCESS
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
manage:
description: Set a list of nodes to 'manageable' state
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_nodes_manageable:
on-success: send_message
on-error: set_status_failed_nodes_manageable
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_node_state
input:
node_uuid: <% $.uuid %>
state_action: 'manage'
target_state: 'manageable'
error_states:
# node going back to enroll designates power credentials failure
- 'enroll'
- 'error'
set_status_failed_nodes_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_manageable).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
_introspect:
description: >
An internal workflow. The tripleo.baremetal.v1.introspect workflow
should be used for introspection.
input:
- node_uuid
- timeout
- queue_name
output:
result: <% task(start_introspection).result %>
tags:
- tripleo-common-managed
tasks:
start_introspection:
action: baremetal_introspection.introspect uuid=<% $.node_uuid %>
on-success: wait_for_introspection_to_finish
on-error: set_status_failed_start_introspection
set_status_failed_start_introspection:
publish:
status: FAILED
message: <% task(start_introspection).result %>
introspected_nodes: []
on-success: send_message
wait_for_introspection_to_finish:
action: baremetal_introspection.wait_for_finish
input:
uuids: <% [$.node_uuid] %>
# The interval is 10 seconds, so divide to make the overall timeout
# in seconds correct.
max_retries: <% $.timeout / 10 %>
retry_interval: 10
publish:
introspected_node: <% task().result.values().first() %>
status: <% bool(task().result.values().first().error) and "FAILED" or "SUCCESS" %>
publish-on-error:
status: FAILED
message: <% task().result %>
on-success: wait_for_introspection_to_finish_success
on-error: wait_for_introspection_to_finish_error
wait_for_introspection_to_finish_success:
publish:
message: <% "Introspection of node {0} completed. Status:{1}. Errors:{2}".format($.introspected_node.uuid, $.status, $.introspected_node.error) %>
on-success: send_message
wait_for_introspection_to_finish_error:
on-success: send_message
publish:
status: FAILED
message: <% "Introspection of node {0} failed.".format($.node_uuid) %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { introspected_node => $.get('introspected_node'), node_uuid => $.node_uuid } %>
introspect:
description: >
Take a list of nodes and move them through introspection.
By default each node will attempt introspection up to 3 times (two
retries plus the initial attemp) if it fails. This behaviour can be
modified by changing the max_retry_attempts input.
The workflow will assume the node has timed out after 20 minutes (1200
seconds). This can be changed by passing the node_timeout input in
seconds.
input:
- node_uuids
- run_validations: False
- queue_name: tripleo
- concurrency: 20
- max_retry_attempts: 2
- node_timeout: 1200
tags:
- tripleo-common-managed
tasks:
initialize:
publish:
introspection_attempt: 0
on-complete:
- run_validations: <% $.run_validations %>
- introspect_nodes: <% not $.run_validations %>
run_validations:
workflow: tripleo.validations.v1.run_groups
input:
group_names:
- 'pre-introspection'
queue_name: <% $.queue_name %>
on-success: introspect_nodes
on-error: set_validations_failed
set_validations_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(run_validations).result %>
introspect_nodes:
with-items: uuid in <% $.node_uuids %>
concurrency: <% $.concurrency %>
workflow: _introspect
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
timeout: <% $.node_timeout %>
# on-error is triggered if one or more nodes failed introspection. We
# still go to get_introspection_status as it will collect the result
# for each node. Unless we hit the retry limit.
on-error:
- get_introspection_status: <% $.introspection_attempt <= $.max_retry_attempts %>
- max_retry_attempts_reached: <% $.introspection_attempt > $.max_retry_attempts %>
on-success: get_introspection_status
get_introspection_status:
with-items: uuid in <% $.node_uuids %>
action: baremetal_introspection.get_status
input:
uuid: <% $.uuid %>
publish:
introspected_nodes: <% task().result.toDict($.uuid, $) %>
# Currently there is no way for us to ignore user introspection
# aborts. This means we will retry aborted nodes until the Ironic API
# gives us more details (error code or a boolean to show aborts etc.)
# If a node hasn't finished, we consider it to be failed.
# TODO(d0ugal): When possible, don't retry introspection of nodes
# that a user manually aborted.
failed_introspection: <% task().result.where($.finished = true and $.error != null).select($.uuid) + task().result.where($.finished = false).select($.uuid) %>
publish-on-error:
# If a node fails to start introspection, getting the status can fail.
# When that happens, the result is a string and the nodes need to be
# filtered out.
introspected_nodes: <% task().result.where(isDict($)).toDict($.uuid, $) %>
# If there was an error, the exception string we get doesn't give us
# the UUID. So we use a set difference to find the UUIDs missing in
# the results. These are then added to the failed nodes.
failed_introspection: <% ($.node_uuids.toSet() - task().result.where(isDict($)).select($.uuid).toSet()) + task().result.where(isDict($)).where($.finished = true and $.error != null).toSet() + task().result.where(isDict($)).where($.finished = false).toSet() %>
on-error: increase_attempt_counter
on-success:
- successful_introspection: <% $.failed_introspection.len() = 0 %>
- increase_attempt_counter: <% $.failed_introspection.len() > 0 %>
increase_attempt_counter:
publish:
introspection_attempt: <% $.introspection_attempt + 1 %>
on-complete:
retry_failed_nodes
retry_failed_nodes:
publish:
status: RUNNING
message: <% 'Retrying {0} nodes that failed introspection. Attempt {1} of {2} '.format($.failed_introspection.len(), $.introspection_attempt, $.max_retry_attempts + 1) %>
# We are about to retry, update the tracking stats.
node_uuids: <% $.failed_introspection %>
on-success:
- send_message
- introspect_nodes
max_retry_attempts_reached:
publish:
status: FAILED
message: <% 'Retry limit reached with {0} nodes still failing introspection'.format($.failed_introspection.len()) %>
on-complete: send_message
successful_introspection:
publish:
status: SUCCESS
message: Successfully introspected <% $.introspected_nodes.len() %> node(s).
on-complete: send_message
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { introspected_nodes => $.get('introspected_nodes', []), failed_introspection => $.get('failed_introspection', []) } %>
introspect_manageable_nodes:
description: Introspect all nodes in a 'manageable' state.
input:
- run_validations: False
- queue_name: tripleo
- concurrency: 20
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: validate_nodes
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
validate_nodes:
on-success:
- introspect_manageable: <% $.managed_nodes.len() > 0 %>
- set_status_failed_no_nodes: <% $.managed_nodes.len() = 0 %>
set_status_failed_no_nodes:
on-success: send_message
publish:
status: FAILED
message: No manageable nodes to introspect. Check node states and maintenance.
introspect_manageable:
on-success: send_message
on-error: set_status_introspect_manageable
workflow: tripleo.baremetal.v1.introspect
input:
node_uuids: <% $.managed_nodes %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
concurrency: <% $.concurrency %>
publish:
introspected_nodes: <% task().result.introspected_nodes %>
set_status_introspect_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(introspect_manageable).result %>
introspected_nodes: []
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { introspected_nodes => $.get('introspected_nodes', []) } %>
configure:
description: Take a list of manageable nodes and update their boot configuration.
input:
- node_uuids
- queue_name: tripleo
- kernel_name: bm-deploy-kernel
- ramdisk_name: bm-deploy-ramdisk
- instance_boot_option: null
- root_device: null
- root_device_minimum_size: 4
- overwrite_root_device_hints: False
tags:
- tripleo-common-managed
tasks:
configure_boot:
on-success: configure_root_device
on-error: set_status_failed_configure_boot
with-items: node_uuid in <% $.node_uuids %>
action: tripleo.baremetal.configure_boot node_uuid=<% $.node_uuid %> kernel_name=<% $.kernel_name %> ramdisk_name=<% $.ramdisk_name %> instance_boot_option=<% $.instance_boot_option %>
configure_root_device:
on-success: send_message
on-error: set_status_failed_configure_root_device
with-items: node_uuid in <% $.node_uuids %>
action: tripleo.baremetal.configure_root_device node_uuid=<% $.node_uuid %> root_device=<% $.root_device %> minimum_size=<% $.root_device_minimum_size %> overwrite=<% $.overwrite_root_device_hints %>
publish:
status: SUCCESS
message: 'Successfully configured the nodes.'
set_status_failed_configure_boot:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_boot).result %>
set_status_failed_configure_root_device:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_root_device).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
configure_manageable_nodes:
description: Update the boot configuration of all nodes in 'manageable' state.
input:
- queue_name: tripleo
- kernel_name: 'bm-deploy-kernel'
- ramdisk_name: 'bm-deploy-ramdisk'
- instance_boot_option: null
- root_device: null
- root_device_minimum_size: 4
- overwrite_root_device_hints: False
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: configure_manageable
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
configure_manageable:
on-success: send_message
on-error: set_status_failed_configure_manageable
workflow: tripleo.baremetal.v1.configure
input:
node_uuids: <% $.managed_nodes %>
queue_name: <% $.queue_name %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
root_device: <% $.root_device %>
root_device_minimum_size: <% $.root_device_minimum_size %>
overwrite_root_device_hints: <% $.overwrite_root_device_hints %>
publish:
message: 'Manageable nodes configured successfully.'
set_status_failed_configure_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_manageable).result %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
tag_node:
description: Tag a node with a role
input:
- node_uuid
- role: null
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
update_node:
on-complete: send_message
action: tripleo.baremetal.update_node_capability node_uuid=<% $.node_uuid %> capability='profile' value=<% $.role %>
publish:
message: <% task().result %>
status: SUCCESS
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
tag_nodes:
description: Runs the tag_node workflow in a loop
input:
- tag_node_uuids
- untag_node_uuids
- role
- plan: overcloud
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
tag_nodes:
with-items: node_uuid in <% $.tag_node_uuids %>
workflow: tripleo.baremetal.v1.tag_node
input:
node_uuid: <% $.node_uuid %>
queue_name: <% $.queue_name %>
role: <% $.role %>
concurrency: 1
on-success: untag_nodes
on-error: send_message
untag_nodes:
with-items: node_uuid in <% $.untag_node_uuids %>
workflow: tripleo.baremetal.v1.tag_node
input:
node_uuid: <% $.node_uuid %>
queue_name: <% $.queue_name %>
concurrency: 1
on-success: update_role_parameters
on-error: send_message
update_role_parameters:
on-complete: send_message
action: tripleo.parameters.update_role role=<% $.role %> container=<% $.plan %>
publish:
message: <% task().result %>
status: SUCCESS
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'FAILED') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
nodes_with_hint:
description: Find nodes matching a hint regex
input:
- hint_regex
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_nodes:
with-items: provision_state in <% ['available', 'active'] %>
action: ironic.node_list maintenance=false provision_state=<% $.provision_state %> detail=true
on-success: get_matching_nodes
on-error: set_status_failed_get_nodes
get_matching_nodes:
with-items: node in <% task(get_nodes).result.flatten() %>
action: tripleo.baremetal.get_node_hint node=<% $.node %>
on-success: send_message
on-error: set_status_failed_get_matching_nodes
publish:
matching_nodes: <% let(hint_regex => $.hint_regex) -> task().result.where($.hint and $.hint.matches($hint_regex)).uuid %>
set_status_failed_get_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_nodes).result %>
set_status_failed_get_matching_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_matching_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload:
matching_nodes: <% $.matching_nodes or [] %>
nodes_with_profile:
description: Find nodes with a specific profile
input:
- profile
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_active_nodes:
action: ironic.node_list maintenance=false provision_state='active' detail=true
on-success: get_available_nodes
on-error: set_status_failed_get_active_nodes
get_available_nodes:
action: ironic.node_list maintenance=false provision_state='available' detail=true
on-success: get_matching_nodes
on-error: set_status_failed_get_available_nodes
get_matching_nodes:
with-items: node in <% task(get_available_nodes).result + task(get_active_nodes).result %>
action: tripleo.baremetal.get_profile node=<% $.node %>
on-success: send_message
on-error: set_status_failed_get_matching_nodes
publish:
matching_nodes: <% let(input_profile_name => $.profile) -> task().result.where($.profile = $input_profile_name).uuid %>
set_status_failed_get_active_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_active_nodes).result %>
set_status_failed_get_available_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_available_nodes).result %>
set_status_failed_get_matching_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_matching_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { matching_nodes => $.matching_nodes or [] } %>
create_raid_configuration:
description: Create and apply RAID configuration for given nodes
input:
- node_uuids
- configuration
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_configuration:
with-items: node_uuid in <% $.node_uuids %>
action: ironic.node_set_target_raid_config node_ident=<% $.node_uuid %> target_raid_config=<% $.configuration %>
on-success: apply_configuration
on-error: set_configuration_failed
set_configuration_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_configuration).result %>
apply_configuration:
with-items: node_uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuid: <% $.node_uuid %>
clean_steps:
- interface: raid
step: delete_configuration
- interface: raid
step: create_configuration
timeout: 1800 # building RAID should be fast than general cleaning
retry_count: 180
retry_delay: 10
on-success: send_message
on-error: apply_configuration_failed
publish:
message: <% task().result %>
status: SUCCESS
apply_configuration_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(apply_configuration).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
cellv2_discovery:
description: Run cell_v2 host discovery
input:
- node_uuids
tags:
- tripleo-common-managed
tasks:
cell_v2_discover_hosts:
on-success: name_to_uuids
action: tripleo.baremetal.cell_v2_discover_hosts
name_to_uuids:
on-success: wait_for_nova_resources
action: ironic.node_get
input:
node_id: <% $.node %>
fields: ['uuid']
with-items: node in <% $.node_uuids %>
publish:
node_uuids: <% task().result.uuid %>
wait_for_nova_resources:
with-items: node_uuid in <% $.node_uuids %>
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
discover_nodes:
description: Run nodes discovery over the given IP range
input:
- ip_addresses
- credentials
- ports: [623]
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_all_nodes:
action: ironic.node_list
input:
fields: ["uuid", "driver", "driver_info"]
limit: 0
on-success: get_candidate_nodes
on-error: get_all_nodes_failed
publish:
existing_nodes: <% task().result %>
get_all_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_all_nodes).result %>
get_candidate_nodes:
action: tripleo.baremetal.get_candidate_nodes
input:
ip_addresses: <% $.ip_addresses %>
credentials: <% $.credentials %>
ports: <% $.ports %>
existing_nodes: <% $.existing_nodes %>
on-success: probe_nodes
on-error: get_candidate_nodes_failed
publish:
candidates: <% task().result %>
get_candidate_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_candidate_nodes).result %>
probe_nodes:
action: tripleo.baremetal.probe_node
on-success: send_message
on-error: probe_nodes_failed
input:
ip: <% $.node.ip %>
port: <% $.node.port %>
username: <% $.node.username %>
password: <% $.node.password %>
with-items:
- node in <% $.candidates %>
publish:
nodes_json: <% task().result.where($ != null) %>
probe_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(probe_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { nodes_json => $.get('nodes_json', []) } %>
discover_and_enroll_nodes:
description: Run nodes discovery over the given IP range and enroll nodes
input:
- ip_addresses
- credentials
- ports: [623]
- kernel_name: null
- ramdisk_name: null
- instance_boot_option: null
- initial_state: manageable
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
discover_nodes:
workflow: tripleo.baremetal.v1.discover_nodes
input:
ip_addresses: <% $.ip_addresses %>
ports: <% $.ports %>
credentials: <% $.credentials %>
queue_name: <% $.queue_name %>
on-success: enroll_nodes
on-error: discover_nodes_failed
publish:
nodes_json: <% task().result.nodes_json %>
discover_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(discover_nodes).result %>
enroll_nodes:
workflow: tripleo.baremetal.v1.register_or_update
input:
nodes_json: <% $.nodes_json %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
initial_state: <% $.initial_state %>
on-success: send_message
on-error: enroll_nodes_failed
publish:
registered_nodes: <% task().result.registered_nodes %>
enroll_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(enroll_nodes).result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload: <% { registered_nodes => $.get('registered_nodes', []) } %>
clean_nodes:
description: Wipe partition tables on all disks of given nodes
input:
- node_uuids
- concurrency: 20
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
clean_nodes:
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuid: <% $.node_uuid %>
clean_steps:
- interface: deploy
step: erase_devices_metadata
# Lower timeout, since metadata cleaning takes less time
timeout: 1200 # 20 minutes
retry_delay: 10
retry_count: 120
# Let mistral handle concurrency
with-items: node_uuid in <% $.node_uuids %>
concurrency: <% $.concurrency %>
publish-on-error:
status: FAILED
message: <% task().result %>
on-complete: send_message
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
clean_manageable_nodes:
description: Clean all nodes in a 'manageable' state.
input:
- concurrency: 20
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list
input:
maintenance: false
associated: false
provision_state: manageable
on-success: clean_manageable
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
publish:
cleaned_nodes: <% task().result.uuid %>
clean_manageable:
on-success: send_message
workflow: tripleo.baremetal.v1.clean_nodes
input:
node_uuids: <% $.cleaned_nodes %>
concurrency: <% $.concurrency %>
queue_name: <% $.queue_name %>
publish:
status: SUCCESS
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload:
cleaned_nodes: <% $.get('cleaned_nodes', []) %>
apply_bios_settings:
description: Apply BIOS settings for given nodes
input:
- node_uuids
- settings
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
apply_configuration:
with-items: node_uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuid: <% $.node_uuid %>
clean_steps:
- interface: bios
step: apply_configuration
args: <% $.settings %>
on-complete: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
apply_bios_settings_on_manageable_nodes:
description: Apply BIOS settings on manageable nodes
input:
- settings
- concurrency: 20
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list
input:
maintenance: false
associated: false
provision_state: manageable
on-success: apply_configuration_manageable
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
publish:
applied_nodes: <% task().result.uuid %>
apply_configuration_manageable:
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuids: <% $.applied_nodes %>
concurrency: <% $.concurrency %>
clean_steps:
- interface: bios
step: apply_configuration
args: <% $.settings %>
on-complete: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload:
applied_nodes: <% $.get('applied_nodes', []) %>
reset_bios_settings:
description: Reset BIOS settings for given nodes to factory default
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
factory_reset:
with-items: node_uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuid: <% $.node_uuid %>
clean_steps:
- interface: bios
step: factory_reset
on-complete: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
reset_bios_settings_on_manageable_nodes:
description: Reset BIOS settings on manageable nodes to factory default
input:
- concurrency: 20
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list
input:
maintenance: false
associated: false
provision_state: manageable
on-success: factory_reset_manageable
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
publish:
reset_nodes: <% task().result.uuid %>
factory_reset_manageable:
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuids: <% $.reset_nodes %>
concurrency: <% $.concurrency %>
clean_steps:
- interface: bios
step: factory_reset
on-complete: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
send_message:
workflow: tripleo.messaging.v1.send
input:
queue_name: <% $.queue_name %>
type: <% execution().name %>
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
payload:
reset_nodes: <% $.get('reset_nodes', []) %>