tripleo-common/workbooks/baremetal.yaml
Dougal Matthews 7a18b486a1 Correct the failed status in the baremetal workflow
Two tasks in the introspect workflow have incorrectly been sending
"ERROR" statuses when they should have sent "FAILED". This then meant
the workflow appeared to finish without errors (or failures). This is
primarily a problem for the introspect_manageable_nodes workflow which
then can't detect errors and reports that everything was successful.

Change-Id: I34a91dd14bb19775ad62271def6ecb66398c84db
Closes-Bug: #1733303
2017-11-20 11:30:14 +00:00

1240 lines
38 KiB
YAML

---
version: '2.0'
name: tripleo.baremetal.v1
description: TripleO Baremetal Workflows
workflows:
set_node_state:
input:
- node_uuid
- state_action
- target_state
tags:
- tripleo-common-managed
tasks:
set_provision_state:
on-success: wait_for_provision_state
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state=<% $.state_action %>
wait_for_provision_state:
action: ironic.node_get node_id=<% $.node_uuid %>
timeout: 1200 #20 minutes
retry:
delay: 3
count: 400
continue-on: <% task(wait_for_provision_state).result.provision_state != $.target_state %>
set_power_state:
input:
- node_uuid
- state_action
- target_state
tags:
- tripleo-common-managed
tasks:
set_provision_state:
on-success: wait_for_power_state
action: ironic.node_set_power_state node_id=<% $.node_uuid %> state=<% $.state_action %>
wait_for_power_state:
action: ironic.node_get node_id=<% $.node_uuid %>
timeout: 120 #2 minutes
retry:
delay: 6
count: 20
continue-on: <% task(wait_for_power_state).result.power_state != $.target_state %>
manual_cleaning:
input:
- node_uuid
- clean_steps
- timeout: 7200 # 2 hours (cleaning can take really long)
- retry_delay: 10
- retry_count: 720
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_provision_state:
on-success: wait_for_provision_state
on-error: set_provision_state_failed
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state='clean' cleansteps=<% $.clean_steps %>
set_provision_state_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_provision_state).result %>
wait_for_provision_state:
on-success: send_message
action: ironic.node_get node_id=<% $.node_uuid %>
timeout: <% $.timeout %>
retry:
delay: <% $.retry_delay %>
count: <% $.retry_count %>
continue-on: <% task().result.provision_state != 'manageable' %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.manual_cleaning
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
validate_nodes:
description: Validate nodes JSON
input:
- nodes_json
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
validate_nodes:
action: tripleo.baremetal.validate_nodes
on-success: send_message
on-error: validation_failed
input:
nodes_json: <% $.nodes_json %>
validation_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(validate_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.validate_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
register_or_update:
description: Take nodes JSON and create nodes in a "manageable" state
input:
- nodes_json
- remove: False
- queue_name: tripleo
- kernel_name: null
- ramdisk_name: null
- instance_boot_option: local
- initial_state: manageable
tags:
- tripleo-common-managed
tasks:
validate_input:
workflow: tripleo.baremetal.v1.validate_nodes
on-success: register_or_update_nodes
on-error: validation_failed
input:
nodes_json: <% $.nodes_json %>
queue_name: <% $.queue_name %>
validation_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(validate_input).result %>
registered_nodes: []
register_or_update_nodes:
action: tripleo.baremetal.register_or_update_nodes
on-success:
- set_nodes_managed: <% $.initial_state != "enroll" %>
- send_message: <% $.initial_state = "enroll" %>
on-error: set_status_failed_register_or_update_nodes
input:
nodes_json: <% $.nodes_json %>
remove: <% $.remove %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
publish:
registered_nodes: <% task().result %>
new_nodes: <% task().result.where($.provision_state = 'enroll') %>
set_status_failed_register_or_update_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(register_or_update_nodes).result %>
registered_nodes: []
set_nodes_managed:
on-success:
- set_nodes_available: <% $.initial_state = "available" %>
- send_message: <% $.initial_state != "available" %>
on-error: set_status_failed_nodes_managed
workflow: tripleo.baremetal.v1.manage
input:
node_uuids: <% $.new_nodes.uuid %>
queue_name: <% $.queue_name %>
publish:
status: SUCCESS
message: Nodes set to managed.
set_status_failed_nodes_managed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_managed).result %>
set_nodes_available:
on-success: send_message
on-error: set_status_failed_nodes_available
workflow: tripleo.baremetal.v1.provide node_uuids=<% $.new_nodes.uuid %> queue_name=<% $.queue_name %>
publish:
status: SUCCESS
message: Nodes set to available.
set_status_failed_nodes_available:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_available).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.register_or_update
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
registered_nodes: <% $.registered_nodes or [] %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
provide:
description: Take a list of nodes and move them to "available"
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_nodes_available:
on-success: cell_v2_discover_hosts
on-error: set_status_failed_nodes_available
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_node_state
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
state_action: 'provide'
target_state: 'available'
set_status_failed_nodes_available:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_available).result %>
cell_v2_discover_hosts:
on-success: try_power_off
on-error: cell_v2_discover_hosts_failed
workflow: tripleo.baremetal.v1.cellv2_discovery
input:
node_uuids: <% $.node_uuids %>
queue_name: <% $.queue_name %>
timeout: 900 #15 minutes
retry:
delay: 30
count: 30
cell_v2_discover_hosts_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(cell_v2_discover_hosts).result %>
try_power_off:
on-success: send_message
on-error: power_off_failed
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_power_state
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
state_action: 'off'
target_state: 'power off'
publish:
status: SUCCESS
message: 'Successfully set nodes state to available.'
power_off_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(try_power_off).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.provide
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
provide_manageable_nodes:
description: Provide all nodes in a 'manageable' state.
input:
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: provide_manageable
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
provide_manageable:
on-success: send_message
workflow: tripleo.baremetal.v1.provide
input:
node_uuids: <% $.managed_nodes %>
queue_name: <% $.queue_name %>
publish:
status: SUCCESS
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.provide_manageable_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
manage:
description: Set a list of nodes to 'manageable' state
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_nodes_manageable:
on-success: send_message
on-error: set_status_failed_nodes_manageable
with-items: uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.set_node_state
input:
node_uuid: <% $.uuid %>
state_action: 'manage'
target_state: 'manageable'
set_status_failed_nodes_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(set_nodes_manageable).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.manage
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
_introspect:
description: >
An internal workflow. The tripleo.baremetal.v1.introspect workflow
should be used for introspection.
input:
- node_uuid
- timeout
- queue_name
output:
result: <% task(start_introspection).result %>
tags:
- tripleo-common-managed
tasks:
start_introspection:
action: baremetal_introspection.introspect uuid=<% $.node_uuid %>
on-success: wait_for_introspection_to_finish
on-error: set_status_failed_start_introspection
set_status_failed_start_introspection:
publish:
status: FAILED
message: <% task(start_introspection).result %>
introspected_nodes: []
on-success: send_message
wait_for_introspection_to_finish:
action: baremetal_introspection.wait_for_finish
input:
uuids: <% [$.node_uuid] %>
# The interval is 10 seconds, so divide to make the overall timeout
# in seconds correct.
max_retries: <% $.timeout / 10 %>
retry_interval: 10
publish:
introspected_node: <% task().result.values().first() %>
status: <% bool(task().result.values().first().error) and "FAILED" or "SUCCESS" %>
publish-on-error:
status: FAILED
message: <% task().result %>
on-success: wait_for_introspection_to_finish_success
on-error: wait_for_introspection_to_finish_error
wait_for_introspection_to_finish_success:
publish:
message: <% "Introspection of node {0} completed. Status:{1}. Errors:{2}".format($.introspected_node.uuid, $.status, $.introspected_node.error) %>
on-success: send_message
wait_for_introspection_to_finish_error:
publish:
message: <% "Introspection of node {0} timed out.".format($.node_uuid) %>
on-success: send_message
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1._introspect
payload:
status: <% $.status %>
message: <% $.message %>
introspected_node: <% $.get('introspected_node') %>
node_uuid: <% $.node_uuid %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
introspect:
description: >
Take a list of nodes and move them through introspection.
By default each node will attempt introspection up to 3 times (two
retries plus the initial attemp) if it fails. This behaviour can be
modified by changing the max_retry_attempts input.
The workflow will assume the node has timed out after 20 minutes (1200
seconds). This can be changed by passing the node_timeout input in
seconds.
input:
- node_uuids
- run_validations: False
- queue_name: tripleo
- concurrency: 20
- max_retry_attempts: 2
- node_timeout: 1200
tags:
- tripleo-common-managed
task-defaults:
on-error: unhandled_error
tasks:
initialize:
publish:
introspection_attempt: 1
on-complete:
- run_validations: <% $.run_validations %>
- introspect_nodes: <% not $.run_validations %>
run_validations:
workflow: tripleo.validations.v1.run_groups
input:
group_names:
- 'pre-introspection'
queue_name: <% $.queue_name %>
on-success: introspect_nodes
on-error: set_validations_failed
set_validations_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(run_validations).result %>
introspect_nodes:
with-items: uuid in <% $.node_uuids %>
concurrency: <% $.concurrency %>
workflow: _introspect
input:
node_uuid: <% $.uuid %>
queue_name: <% $.queue_name %>
timeout: <% $.node_timeout %>
# on-error is triggered if one or more nodes failed introspection. We
# still go to get_introspection_status as it will collect the result
# for each node. Unless we hit the retry limit.
on-error:
- get_introspection_status: <% $.introspection_attempt <= $.max_retry_attempts %>
- max_retry_attempts_reached: <% $.introspection_attempt > $.max_retry_attempts %>
on-success: get_introspection_status
get_introspection_status:
with-items: uuid in <% $.node_uuids %>
action: baremetal_introspection.get_status
input:
uuid: <% $.uuid %>
publish:
introspected_nodes: <% task().result.toDict($.uuid, $) %>
# Currently there is no way for us to ignore user introspection
# aborts. This means we will retry aborted nodes until the Ironic API
# gives us more details (error code or a boolean to show aborts etc.)
# If a node hasn't finished, we consider it to be failed.
# TODO(d0ugal): When possible, don't retry introspection of nodes
# that a user manually aborted.
failed_introspection: <% task().result.where($.finished = true and $.error != null).select($.uuid) + task().result.where($.finished = false).select($.uuid) %>
publish-on-error:
# If a node fails to start introspection, getting the status can fail.
# When that happens, the result is a string and the nodes need to be
# filtered out.
introspected_nodes: <% task().result.where(isDict($)).toDict($.uuid, $) %>
# If there was an error, the exception string we get doesn't give us
# the UUID. So we use a set difference to find the UUIDs missing in
# the results. These are then added to the failed nodes.
failed_introspection: <% ($.node_uuids.toSet() - task().result.where(isDict($)).select($.uuid).toSet()) + task().result.where(isDict($)).where($.finished = true and $.error != null).toSet() + task().result.where(isDict($)).where($.finished = false).toSet() %>
on-error: increase_attempt_counter
on-success:
- successful_introspection: <% $.failed_introspection.len() = 0 %>
- increase_attempt_counter: <% $.failed_introspection.len() > 0 %>
increase_attempt_counter:
publish:
introspection_attempt: <% $.introspection_attempt + 1 %>
on-complete:
retry_failed_nodes
retry_failed_nodes:
publish:
status: RUNNING
message: <% 'Retrying {0} nodes that failed introspection. Attempt {1} of {2} '.format($.failed_introspection.len(), $.introspection_attempt, $.max_retry_attempts + 1) %>
# We are about to retry, update the tracking stats.
node_uuids: <% $.failed_introspection %>
on-success:
- send_message
- introspect_nodes
max_retry_attempts_reached:
publish:
status: FAILED
message: <% 'Retry limit reached with {0} nodes still failing introspection'.format($.failed_introspection.len()) %>
on-complete: send_message
successful_introspection:
publish:
status: SUCCESS
message: 'Successfully introspected nodes.'
on-complete: send_message
unhandled_error:
publish:
status: FAILED
message: "Unhandled workflow error"
on-complete: send_message
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.introspect
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
introspected_nodes: <% $.get('introspected_nodes', []) %>
failed_introspection: <% $.get('failed_introspection', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
introspect_manageable_nodes:
description: Introspect all nodes in a 'manageable' state.
input:
- run_validations: False
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: introspect_manageable
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
introspect_manageable:
on-success: send_message
on-error: set_status_introspect_manageable
workflow: tripleo.baremetal.v1.introspect
input:
node_uuids: <% $.managed_nodes %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
introspected_nodes: <% task().result.introspected_nodes %>
message: 'Nodes introspected successfully.'
set_status_introspect_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(introspect_manageable).result %>
introspected_nodes: []
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.introspect_manageable_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
introspected_nodes: <% $.get('introspected_nodes', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
configure:
description: Take a list of manageable nodes and update their boot configuration.
input:
- node_uuids
- queue_name: tripleo
- kernel_name: bm-deploy-kernel
- ramdisk_name: bm-deploy-ramdisk
- instance_boot_option: null
- root_device: null
- root_device_minimum_size: 4
- overwrite_root_device_hints: False
tags:
- tripleo-common-managed
tasks:
configure_boot:
on-success: configure_root_device
on-error: set_status_failed_configure_boot
with-items: node_uuid in <% $.node_uuids %>
action: tripleo.baremetal.configure_boot node_uuid=<% $.node_uuid %> kernel_name=<% $.kernel_name %> ramdisk_name=<% $.ramdisk_name %> instance_boot_option=<% $.instance_boot_option %>
configure_root_device:
on-success: send_message
on-error: set_status_failed_configure_root_device
with-items: node_uuid in <% $.node_uuids %>
action: tripleo.baremetal.configure_root_device node_uuid=<% $.node_uuid %> root_device=<% $.root_device %> minimum_size=<% $.root_device_minimum_size %> overwrite=<% $.overwrite_root_device_hints %>
publish:
status: SUCCESS
message: 'Successfully configured the nodes.'
set_status_failed_configure_boot:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_boot).result %>
set_status_failed_configure_root_device:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_root_device).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.configure
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
configure_manageable_nodes:
description: Update the boot configuration of all nodes in 'manageable' state.
input:
- queue_name: tripleo
- kernel_name: 'bm-deploy-kernel'
- ramdisk_name: 'bm-deploy-ramdisk'
- instance_boot_option: null
- root_device: null
- root_device_minimum_size: 4
- overwrite_root_device_hints: False
tags:
- tripleo-common-managed
tasks:
get_manageable_nodes:
action: ironic.node_list maintenance=False associated=False
on-success: configure_manageable
on-error: set_status_failed_get_manageable_nodes
publish:
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
configure_manageable:
on-success: send_message
on-error: set_status_failed_configure_manageable
workflow: tripleo.baremetal.v1.configure
input:
node_uuids: <% $.managed_nodes %>
queue_name: <% $.queue_name %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
root_device: <% $.root_device %>
root_device_minimum_size: <% $.root_device_minimum_size %>
overwrite_root_device_hints: <% $.overwrite_root_device_hints %>
publish:
message: 'Manageable nodes configured successfully.'
set_status_failed_configure_manageable:
on-success: send_message
publish:
status: FAILED
message: <% task(configure_manageable).result %>
set_status_failed_get_manageable_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_manageable_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.configure_manageable_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
tag_node:
description: Tag a node with a role
input:
- node_uuid
- role: null
- queue_name: tripleo
task-defaults:
on-error: send_message
tags:
- tripleo-common-managed
tasks:
update_node:
on-success: send_message
action: tripleo.baremetal.update_node_capability node_uuid=<% $.node_uuid %> capability='profile' value=<% $.role %>
publish:
message: <% task().result %>
status: SUCCESS
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.tag_node
payload:
status: <% $.get('status', 'FAILED') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
tag_nodes:
description: Runs the tag_node workflow in a loop
input:
- tag_node_uuids
- untag_node_uuids
- role
- plan: overcloud
- queue_name: tripleo
task-defaults:
on-error: send_message
tags:
- tripleo-common-managed
tasks:
tag_nodes:
with-items: node_uuid in <% $.tag_node_uuids %>
workflow: tripleo.baremetal.v1.tag_node
input:
node_uuid: <% $.node_uuid %>
queue_name: <% $.queue_name %>
role: <% $.role %>
concurrency: 1
on-success: untag_nodes
untag_nodes:
with-items: node_uuid in <% $.untag_node_uuids %>
workflow: tripleo.baremetal.v1.tag_node
input:
node_uuid: <% $.node_uuid %>
queue_name: <% $.queue_name %>
concurrency: 1
on-success: update_role_parameters
update_role_parameters:
on-success: send_message
action: tripleo.parameters.update_role role=<% $.role %> container=<% $.plan %>
publish:
message: <% task().result %>
status: SUCCESS
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.tag_nodes
payload:
status: <% $.get('status', 'FAILED') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
nodes_with_profile:
description: Find nodes with a specific profile
input:
- profile
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_available_nodes:
action: ironic.node_list maintenance=false provision_state='available' detail=true
on-success: get_matching_nodes
on-error: set_status_failed_get_available_nodes
get_matching_nodes:
with-items: node in <% task(get_available_nodes).result %>
action: tripleo.baremetal.get_profile node=<% $.node %>
on-success: send_message
on-error: set_status_failed_get_matching_nodes
publish:
matching_nodes: <% let(input_profile_name => $.profile) -> task().result.where($.profile = $input_profile_name).uuid %>
set_status_failed_get_available_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_available_nodes).result %>
set_status_failed_get_matching_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_matching_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.nodes_with_profile
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
matching_nodes: <% $.matching_nodes or [] %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
create_raid_configuration:
description: Create and apply RAID configuration for given nodes
input:
- node_uuids
- configuration
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
set_configuration:
with-items: node_uuid in <% $.node_uuids %>
action: ironic.node_set_target_raid_config node_ident=<% $.node_uuid %> target_raid_config=<% $.configuration %>
on-success: apply_configuration
on-error: set_configuration_failed
set_configuration_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(set_configuration).result %>
apply_configuration:
with-items: node_uuid in <% $.node_uuids %>
workflow: tripleo.baremetal.v1.manual_cleaning
input:
node_uuid: <% $.node_uuid %>
clean_steps:
- interface: raid
step: delete_configuration
- interface: raid
step: create_configuration
timeout: 1800 # building RAID should be fast than general cleaning
retry_count: 180
retry_delay: 10
on-success: send_message
on-error: apply_configuration_failed
publish:
message: <% task().result %>
status: SUCCESS
apply_configuration_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(apply_configuration).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.create_raid_configuration
payload:
status: <% $.get('status', 'FAILED') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
cellv2_discovery:
description: Run cell_v2 host discovery
input:
- node_uuids
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
cell_v2_discover_hosts:
on-success: wait_for_nova_resources
on-error: cell_v2_discover_hosts_failed
action: tripleo.baremetal.cell_v2_discover_hosts
cell_v2_discover_hosts_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(cell_v2_discover_hosts).result %>
wait_for_nova_resources:
on-success: send_message
on-error: wait_for_nova_resources_failed
with-items: node_uuid in <% $.node_uuids %>
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
wait_for_nova_resources_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(wait_for_nova_resources).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.cellv2_discovery
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
discover_nodes:
description: Run nodes discovery over the given IP range
input:
- ip_addresses
- credentials
- ports: [623]
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_all_nodes:
action: ironic.node_list
input:
fields: ["uuid", "driver", "driver_info"]
limit: 0
on-success: get_candidate_nodes
on-error: get_all_nodes_failed
publish:
existing_nodes: <% task().result %>
get_all_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_all_nodes).result %>
get_candidate_nodes:
action: tripleo.baremetal.get_candidate_nodes
input:
ip_addresses: <% $.ip_addresses %>
credentials: <% $.credentials %>
ports: <% $.ports %>
existing_nodes: <% $.existing_nodes %>
on-success: probe_nodes
on-error: get_candidate_nodes_failed
publish:
candidates: <% task().result %>
get_candidate_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_candidate_nodes).result %>
probe_nodes:
action: tripleo.baremetal.probe_node
on-success: send_message
on-error: probe_nodes_failed
input:
ip: <% $.node.ip %>
port: <% $.node.port %>
username: <% $.node.username %>
password: <% $.node.password %>
with-items:
- node in <% $.candidates %>
publish:
nodes_json: <% task().result.where($ != null) %>
probe_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(probe_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.discover_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
nodes_json: <% $.get('nodes_json', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
discover_and_enroll_nodes:
description: Run nodes discovery over the given IP range and enroll nodes
input:
- ip_addresses
- credentials
- ports: [623]
- kernel_name: null
- ramdisk_name: null
- instance_boot_option: local
- initial_state: manageable
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
discover_nodes:
workflow: tripleo.baremetal.v1.discover_nodes
input:
ip_addresses: <% $.ip_addresses %>
ports: <% $.ports %>
credentials: <% $.credentials %>
queue_name: <% $.queue_name %>
on-success: enroll_nodes
on-error: discover_nodes_failed
publish:
nodes_json: <% task().result.nodes_json %>
discover_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(discover_nodes).result %>
enroll_nodes:
workflow: tripleo.baremetal.v1.register_or_update
input:
nodes_json: <% $.nodes_json %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
initial_state: <% $.initial_state %>
on-success: send_message
on-error: enroll_nodes_failed
publish:
registered_nodes: <% task().result.registered_nodes %>
enroll_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(enroll_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.discover_and_enroll_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
registered_nodes: <% $.get('registered_nodes', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>