7a18b486a1
Two tasks in the introspect workflow have incorrectly been sending "ERROR" statuses when they should have sent "FAILED". This then meant the workflow appeared to finish without errors (or failures). This is primarily a problem for the introspect_manageable_nodes workflow which then can't detect errors and reports that everything was successful. Change-Id: I34a91dd14bb19775ad62271def6ecb66398c84db Closes-Bug: #1733303
1240 lines
38 KiB
YAML
1240 lines
38 KiB
YAML
---
|
|
version: '2.0'
|
|
name: tripleo.baremetal.v1
|
|
description: TripleO Baremetal Workflows
|
|
|
|
workflows:
|
|
|
|
set_node_state:
|
|
input:
|
|
- node_uuid
|
|
- state_action
|
|
- target_state
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_provision_state:
|
|
on-success: wait_for_provision_state
|
|
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state=<% $.state_action %>
|
|
|
|
wait_for_provision_state:
|
|
action: ironic.node_get node_id=<% $.node_uuid %>
|
|
timeout: 1200 #20 minutes
|
|
retry:
|
|
delay: 3
|
|
count: 400
|
|
continue-on: <% task(wait_for_provision_state).result.provision_state != $.target_state %>
|
|
|
|
set_power_state:
|
|
input:
|
|
- node_uuid
|
|
- state_action
|
|
- target_state
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_provision_state:
|
|
on-success: wait_for_power_state
|
|
action: ironic.node_set_power_state node_id=<% $.node_uuid %> state=<% $.state_action %>
|
|
|
|
wait_for_power_state:
|
|
action: ironic.node_get node_id=<% $.node_uuid %>
|
|
timeout: 120 #2 minutes
|
|
retry:
|
|
delay: 6
|
|
count: 20
|
|
continue-on: <% task(wait_for_power_state).result.power_state != $.target_state %>
|
|
|
|
manual_cleaning:
|
|
input:
|
|
- node_uuid
|
|
- clean_steps
|
|
- timeout: 7200 # 2 hours (cleaning can take really long)
|
|
- retry_delay: 10
|
|
- retry_count: 720
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_provision_state:
|
|
on-success: wait_for_provision_state
|
|
on-error: set_provision_state_failed
|
|
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state='clean' cleansteps=<% $.clean_steps %>
|
|
|
|
set_provision_state_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_provision_state).result %>
|
|
|
|
wait_for_provision_state:
|
|
on-success: send_message
|
|
action: ironic.node_get node_id=<% $.node_uuid %>
|
|
timeout: <% $.timeout %>
|
|
retry:
|
|
delay: <% $.retry_delay %>
|
|
count: <% $.retry_count %>
|
|
continue-on: <% task().result.provision_state != 'manageable' %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.manual_cleaning
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
validate_nodes:
|
|
description: Validate nodes JSON
|
|
|
|
input:
|
|
- nodes_json
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
validate_nodes:
|
|
action: tripleo.baremetal.validate_nodes
|
|
on-success: send_message
|
|
on-error: validation_failed
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
|
|
validation_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(validate_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.validate_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
register_or_update:
|
|
description: Take nodes JSON and create nodes in a "manageable" state
|
|
|
|
input:
|
|
- nodes_json
|
|
- remove: False
|
|
- queue_name: tripleo
|
|
- kernel_name: null
|
|
- ramdisk_name: null
|
|
- instance_boot_option: local
|
|
- initial_state: manageable
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
validate_input:
|
|
workflow: tripleo.baremetal.v1.validate_nodes
|
|
on-success: register_or_update_nodes
|
|
on-error: validation_failed
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
queue_name: <% $.queue_name %>
|
|
|
|
validation_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(validate_input).result %>
|
|
registered_nodes: []
|
|
|
|
register_or_update_nodes:
|
|
action: tripleo.baremetal.register_or_update_nodes
|
|
on-success:
|
|
- set_nodes_managed: <% $.initial_state != "enroll" %>
|
|
- send_message: <% $.initial_state = "enroll" %>
|
|
on-error: set_status_failed_register_or_update_nodes
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
remove: <% $.remove %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
publish:
|
|
registered_nodes: <% task().result %>
|
|
new_nodes: <% task().result.where($.provision_state = 'enroll') %>
|
|
|
|
set_status_failed_register_or_update_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(register_or_update_nodes).result %>
|
|
registered_nodes: []
|
|
|
|
set_nodes_managed:
|
|
on-success:
|
|
- set_nodes_available: <% $.initial_state = "available" %>
|
|
- send_message: <% $.initial_state != "available" %>
|
|
on-error: set_status_failed_nodes_managed
|
|
workflow: tripleo.baremetal.v1.manage
|
|
input:
|
|
node_uuids: <% $.new_nodes.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: Nodes set to managed.
|
|
|
|
set_status_failed_nodes_managed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_managed).result %>
|
|
|
|
set_nodes_available:
|
|
on-success: send_message
|
|
on-error: set_status_failed_nodes_available
|
|
workflow: tripleo.baremetal.v1.provide node_uuids=<% $.new_nodes.uuid %> queue_name=<% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: Nodes set to available.
|
|
|
|
set_status_failed_nodes_available:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_available).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.register_or_update
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
registered_nodes: <% $.registered_nodes or [] %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
provide:
|
|
description: Take a list of nodes and move them to "available"
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_nodes_available:
|
|
on-success: cell_v2_discover_hosts
|
|
on-error: set_status_failed_nodes_available
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_node_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
state_action: 'provide'
|
|
target_state: 'available'
|
|
|
|
set_status_failed_nodes_available:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_available).result %>
|
|
|
|
cell_v2_discover_hosts:
|
|
on-success: try_power_off
|
|
on-error: cell_v2_discover_hosts_failed
|
|
workflow: tripleo.baremetal.v1.cellv2_discovery
|
|
input:
|
|
node_uuids: <% $.node_uuids %>
|
|
queue_name: <% $.queue_name %>
|
|
timeout: 900 #15 minutes
|
|
retry:
|
|
delay: 30
|
|
count: 30
|
|
|
|
cell_v2_discover_hosts_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(cell_v2_discover_hosts).result %>
|
|
|
|
try_power_off:
|
|
on-success: send_message
|
|
on-error: power_off_failed
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_power_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
state_action: 'off'
|
|
target_state: 'power off'
|
|
publish:
|
|
status: SUCCESS
|
|
message: 'Successfully set nodes state to available.'
|
|
|
|
power_off_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(try_power_off).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.provide
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
provide_manageable_nodes:
|
|
description: Provide all nodes in a 'manageable' state.
|
|
|
|
input:
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: provide_manageable
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
provide_manageable:
|
|
on-success: send_message
|
|
workflow: tripleo.baremetal.v1.provide
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.provide_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
manage:
|
|
description: Set a list of nodes to 'manageable' state
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_nodes_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_failed_nodes_manageable
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_node_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
state_action: 'manage'
|
|
target_state: 'manageable'
|
|
|
|
set_status_failed_nodes_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_manageable).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.manage
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
_introspect:
|
|
description: >
|
|
An internal workflow. The tripleo.baremetal.v1.introspect workflow
|
|
should be used for introspection.
|
|
|
|
input:
|
|
- node_uuid
|
|
- timeout
|
|
- queue_name
|
|
|
|
output:
|
|
result: <% task(start_introspection).result %>
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
start_introspection:
|
|
action: baremetal_introspection.introspect uuid=<% $.node_uuid %>
|
|
on-success: wait_for_introspection_to_finish
|
|
on-error: set_status_failed_start_introspection
|
|
|
|
set_status_failed_start_introspection:
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(start_introspection).result %>
|
|
introspected_nodes: []
|
|
on-success: send_message
|
|
|
|
wait_for_introspection_to_finish:
|
|
action: baremetal_introspection.wait_for_finish
|
|
input:
|
|
uuids: <% [$.node_uuid] %>
|
|
# The interval is 10 seconds, so divide to make the overall timeout
|
|
# in seconds correct.
|
|
max_retries: <% $.timeout / 10 %>
|
|
retry_interval: 10
|
|
publish:
|
|
introspected_node: <% task().result.values().first() %>
|
|
status: <% bool(task().result.values().first().error) and "FAILED" or "SUCCESS" %>
|
|
publish-on-error:
|
|
status: FAILED
|
|
message: <% task().result %>
|
|
on-success: wait_for_introspection_to_finish_success
|
|
on-error: wait_for_introspection_to_finish_error
|
|
|
|
wait_for_introspection_to_finish_success:
|
|
publish:
|
|
message: <% "Introspection of node {0} completed. Status:{1}. Errors:{2}".format($.introspected_node.uuid, $.status, $.introspected_node.error) %>
|
|
on-success: send_message
|
|
|
|
wait_for_introspection_to_finish_error:
|
|
publish:
|
|
message: <% "Introspection of node {0} timed out.".format($.node_uuid) %>
|
|
on-success: send_message
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1._introspect
|
|
payload:
|
|
status: <% $.status %>
|
|
message: <% $.message %>
|
|
introspected_node: <% $.get('introspected_node') %>
|
|
node_uuid: <% $.node_uuid %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
introspect:
|
|
description: >
|
|
Take a list of nodes and move them through introspection.
|
|
|
|
By default each node will attempt introspection up to 3 times (two
|
|
retries plus the initial attemp) if it fails. This behaviour can be
|
|
modified by changing the max_retry_attempts input.
|
|
|
|
The workflow will assume the node has timed out after 20 minutes (1200
|
|
seconds). This can be changed by passing the node_timeout input in
|
|
seconds.
|
|
|
|
input:
|
|
- node_uuids
|
|
- run_validations: False
|
|
- queue_name: tripleo
|
|
- concurrency: 20
|
|
- max_retry_attempts: 2
|
|
- node_timeout: 1200
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
task-defaults:
|
|
on-error: unhandled_error
|
|
|
|
tasks:
|
|
initialize:
|
|
publish:
|
|
introspection_attempt: 1
|
|
on-complete:
|
|
- run_validations: <% $.run_validations %>
|
|
- introspect_nodes: <% not $.run_validations %>
|
|
|
|
run_validations:
|
|
workflow: tripleo.validations.v1.run_groups
|
|
input:
|
|
group_names:
|
|
- 'pre-introspection'
|
|
queue_name: <% $.queue_name %>
|
|
on-success: introspect_nodes
|
|
on-error: set_validations_failed
|
|
|
|
set_validations_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(run_validations).result %>
|
|
|
|
introspect_nodes:
|
|
with-items: uuid in <% $.node_uuids %>
|
|
concurrency: <% $.concurrency %>
|
|
workflow: _introspect
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
timeout: <% $.node_timeout %>
|
|
# on-error is triggered if one or more nodes failed introspection. We
|
|
# still go to get_introspection_status as it will collect the result
|
|
# for each node. Unless we hit the retry limit.
|
|
on-error:
|
|
- get_introspection_status: <% $.introspection_attempt <= $.max_retry_attempts %>
|
|
- max_retry_attempts_reached: <% $.introspection_attempt > $.max_retry_attempts %>
|
|
on-success: get_introspection_status
|
|
|
|
get_introspection_status:
|
|
with-items: uuid in <% $.node_uuids %>
|
|
action: baremetal_introspection.get_status
|
|
input:
|
|
uuid: <% $.uuid %>
|
|
publish:
|
|
introspected_nodes: <% task().result.toDict($.uuid, $) %>
|
|
# Currently there is no way for us to ignore user introspection
|
|
# aborts. This means we will retry aborted nodes until the Ironic API
|
|
# gives us more details (error code or a boolean to show aborts etc.)
|
|
# If a node hasn't finished, we consider it to be failed.
|
|
# TODO(d0ugal): When possible, don't retry introspection of nodes
|
|
# that a user manually aborted.
|
|
failed_introspection: <% task().result.where($.finished = true and $.error != null).select($.uuid) + task().result.where($.finished = false).select($.uuid) %>
|
|
publish-on-error:
|
|
# If a node fails to start introspection, getting the status can fail.
|
|
# When that happens, the result is a string and the nodes need to be
|
|
# filtered out.
|
|
introspected_nodes: <% task().result.where(isDict($)).toDict($.uuid, $) %>
|
|
# If there was an error, the exception string we get doesn't give us
|
|
# the UUID. So we use a set difference to find the UUIDs missing in
|
|
# the results. These are then added to the failed nodes.
|
|
failed_introspection: <% ($.node_uuids.toSet() - task().result.where(isDict($)).select($.uuid).toSet()) + task().result.where(isDict($)).where($.finished = true and $.error != null).toSet() + task().result.where(isDict($)).where($.finished = false).toSet() %>
|
|
on-error: increase_attempt_counter
|
|
on-success:
|
|
- successful_introspection: <% $.failed_introspection.len() = 0 %>
|
|
- increase_attempt_counter: <% $.failed_introspection.len() > 0 %>
|
|
|
|
increase_attempt_counter:
|
|
publish:
|
|
introspection_attempt: <% $.introspection_attempt + 1 %>
|
|
on-complete:
|
|
retry_failed_nodes
|
|
|
|
retry_failed_nodes:
|
|
publish:
|
|
status: RUNNING
|
|
message: <% 'Retrying {0} nodes that failed introspection. Attempt {1} of {2} '.format($.failed_introspection.len(), $.introspection_attempt, $.max_retry_attempts + 1) %>
|
|
# We are about to retry, update the tracking stats.
|
|
node_uuids: <% $.failed_introspection %>
|
|
on-success:
|
|
- send_message
|
|
- introspect_nodes
|
|
|
|
max_retry_attempts_reached:
|
|
publish:
|
|
status: FAILED
|
|
message: <% 'Retry limit reached with {0} nodes still failing introspection'.format($.failed_introspection.len()) %>
|
|
on-complete: send_message
|
|
|
|
successful_introspection:
|
|
publish:
|
|
status: SUCCESS
|
|
message: 'Successfully introspected nodes.'
|
|
on-complete: send_message
|
|
|
|
unhandled_error:
|
|
publish:
|
|
status: FAILED
|
|
message: "Unhandled workflow error"
|
|
on-complete: send_message
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.introspect
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
introspected_nodes: <% $.get('introspected_nodes', []) %>
|
|
failed_introspection: <% $.get('failed_introspection', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
introspect_manageable_nodes:
|
|
description: Introspect all nodes in a 'manageable' state.
|
|
|
|
input:
|
|
- run_validations: False
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: introspect_manageable
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
introspect_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_introspect_manageable
|
|
workflow: tripleo.baremetal.v1.introspect
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
run_validations: <% $.run_validations %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
introspected_nodes: <% task().result.introspected_nodes %>
|
|
message: 'Nodes introspected successfully.'
|
|
|
|
set_status_introspect_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(introspect_manageable).result %>
|
|
introspected_nodes: []
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.introspect_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
introspected_nodes: <% $.get('introspected_nodes', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
configure:
|
|
description: Take a list of manageable nodes and update their boot configuration.
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
- kernel_name: bm-deploy-kernel
|
|
- ramdisk_name: bm-deploy-ramdisk
|
|
- instance_boot_option: null
|
|
- root_device: null
|
|
- root_device_minimum_size: 4
|
|
- overwrite_root_device_hints: False
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
configure_boot:
|
|
on-success: configure_root_device
|
|
on-error: set_status_failed_configure_boot
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: tripleo.baremetal.configure_boot node_uuid=<% $.node_uuid %> kernel_name=<% $.kernel_name %> ramdisk_name=<% $.ramdisk_name %> instance_boot_option=<% $.instance_boot_option %>
|
|
|
|
configure_root_device:
|
|
on-success: send_message
|
|
on-error: set_status_failed_configure_root_device
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: tripleo.baremetal.configure_root_device node_uuid=<% $.node_uuid %> root_device=<% $.root_device %> minimum_size=<% $.root_device_minimum_size %> overwrite=<% $.overwrite_root_device_hints %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: 'Successfully configured the nodes.'
|
|
|
|
set_status_failed_configure_boot:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_boot).result %>
|
|
|
|
set_status_failed_configure_root_device:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_root_device).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.configure
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
configure_manageable_nodes:
|
|
description: Update the boot configuration of all nodes in 'manageable' state.
|
|
|
|
input:
|
|
- queue_name: tripleo
|
|
- kernel_name: 'bm-deploy-kernel'
|
|
- ramdisk_name: 'bm-deploy-ramdisk'
|
|
- instance_boot_option: null
|
|
- root_device: null
|
|
- root_device_minimum_size: 4
|
|
- overwrite_root_device_hints: False
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: configure_manageable
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
configure_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_failed_configure_manageable
|
|
workflow: tripleo.baremetal.v1.configure
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
queue_name: <% $.queue_name %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
root_device: <% $.root_device %>
|
|
root_device_minimum_size: <% $.root_device_minimum_size %>
|
|
overwrite_root_device_hints: <% $.overwrite_root_device_hints %>
|
|
publish:
|
|
message: 'Manageable nodes configured successfully.'
|
|
|
|
set_status_failed_configure_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_manageable).result %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.configure_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
tag_node:
|
|
description: Tag a node with a role
|
|
input:
|
|
- node_uuid
|
|
- role: null
|
|
- queue_name: tripleo
|
|
|
|
task-defaults:
|
|
on-error: send_message
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
update_node:
|
|
on-success: send_message
|
|
action: tripleo.baremetal.update_node_capability node_uuid=<% $.node_uuid %> capability='profile' value=<% $.role %>
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.tag_node
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
tag_nodes:
|
|
description: Runs the tag_node workflow in a loop
|
|
input:
|
|
- tag_node_uuids
|
|
- untag_node_uuids
|
|
- role
|
|
- plan: overcloud
|
|
- queue_name: tripleo
|
|
|
|
task-defaults:
|
|
on-error: send_message
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
tag_nodes:
|
|
with-items: node_uuid in <% $.tag_node_uuids %>
|
|
workflow: tripleo.baremetal.v1.tag_node
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
role: <% $.role %>
|
|
concurrency: 1
|
|
on-success: untag_nodes
|
|
|
|
untag_nodes:
|
|
with-items: node_uuid in <% $.untag_node_uuids %>
|
|
workflow: tripleo.baremetal.v1.tag_node
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
concurrency: 1
|
|
on-success: update_role_parameters
|
|
|
|
update_role_parameters:
|
|
on-success: send_message
|
|
action: tripleo.parameters.update_role role=<% $.role %> container=<% $.plan %>
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.tag_nodes
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
nodes_with_profile:
|
|
description: Find nodes with a specific profile
|
|
input:
|
|
- profile
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
get_available_nodes:
|
|
action: ironic.node_list maintenance=false provision_state='available' detail=true
|
|
on-success: get_matching_nodes
|
|
on-error: set_status_failed_get_available_nodes
|
|
|
|
get_matching_nodes:
|
|
with-items: node in <% task(get_available_nodes).result %>
|
|
action: tripleo.baremetal.get_profile node=<% $.node %>
|
|
on-success: send_message
|
|
on-error: set_status_failed_get_matching_nodes
|
|
publish:
|
|
matching_nodes: <% let(input_profile_name => $.profile) -> task().result.where($.profile = $input_profile_name).uuid %>
|
|
|
|
set_status_failed_get_available_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_available_nodes).result %>
|
|
|
|
set_status_failed_get_matching_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_matching_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.nodes_with_profile
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
matching_nodes: <% $.matching_nodes or [] %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
create_raid_configuration:
|
|
description: Create and apply RAID configuration for given nodes
|
|
input:
|
|
- node_uuids
|
|
- configuration
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_configuration:
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: ironic.node_set_target_raid_config node_ident=<% $.node_uuid %> target_raid_config=<% $.configuration %>
|
|
on-success: apply_configuration
|
|
on-error: set_configuration_failed
|
|
|
|
set_configuration_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_configuration).result %>
|
|
|
|
apply_configuration:
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.manual_cleaning
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
clean_steps:
|
|
- interface: raid
|
|
step: delete_configuration
|
|
- interface: raid
|
|
step: create_configuration
|
|
timeout: 1800 # building RAID should be fast than general cleaning
|
|
retry_count: 180
|
|
retry_delay: 10
|
|
on-success: send_message
|
|
on-error: apply_configuration_failed
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
apply_configuration_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(apply_configuration).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.create_raid_configuration
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
|
|
cellv2_discovery:
|
|
description: Run cell_v2 host discovery
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
cell_v2_discover_hosts:
|
|
on-success: wait_for_nova_resources
|
|
on-error: cell_v2_discover_hosts_failed
|
|
action: tripleo.baremetal.cell_v2_discover_hosts
|
|
|
|
cell_v2_discover_hosts_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(cell_v2_discover_hosts).result %>
|
|
|
|
wait_for_nova_resources:
|
|
on-success: send_message
|
|
on-error: wait_for_nova_resources_failed
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
|
|
|
|
wait_for_nova_resources_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(wait_for_nova_resources).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.cellv2_discovery
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
|
|
discover_nodes:
|
|
description: Run nodes discovery over the given IP range
|
|
|
|
input:
|
|
- ip_addresses
|
|
- credentials
|
|
- ports: [623]
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_all_nodes:
|
|
action: ironic.node_list
|
|
input:
|
|
fields: ["uuid", "driver", "driver_info"]
|
|
limit: 0
|
|
on-success: get_candidate_nodes
|
|
on-error: get_all_nodes_failed
|
|
publish:
|
|
existing_nodes: <% task().result %>
|
|
|
|
get_all_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_all_nodes).result %>
|
|
|
|
get_candidate_nodes:
|
|
action: tripleo.baremetal.get_candidate_nodes
|
|
input:
|
|
ip_addresses: <% $.ip_addresses %>
|
|
credentials: <% $.credentials %>
|
|
ports: <% $.ports %>
|
|
existing_nodes: <% $.existing_nodes %>
|
|
on-success: probe_nodes
|
|
on-error: get_candidate_nodes_failed
|
|
publish:
|
|
candidates: <% task().result %>
|
|
|
|
get_candidate_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_candidate_nodes).result %>
|
|
|
|
probe_nodes:
|
|
action: tripleo.baremetal.probe_node
|
|
on-success: send_message
|
|
on-error: probe_nodes_failed
|
|
input:
|
|
ip: <% $.node.ip %>
|
|
port: <% $.node.port %>
|
|
username: <% $.node.username %>
|
|
password: <% $.node.password %>
|
|
with-items:
|
|
- node in <% $.candidates %>
|
|
publish:
|
|
nodes_json: <% task().result.where($ != null) %>
|
|
|
|
probe_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(probe_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.discover_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
nodes_json: <% $.get('nodes_json', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
discover_and_enroll_nodes:
|
|
description: Run nodes discovery over the given IP range and enroll nodes
|
|
|
|
input:
|
|
- ip_addresses
|
|
- credentials
|
|
- ports: [623]
|
|
- kernel_name: null
|
|
- ramdisk_name: null
|
|
- instance_boot_option: local
|
|
- initial_state: manageable
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
discover_nodes:
|
|
workflow: tripleo.baremetal.v1.discover_nodes
|
|
input:
|
|
ip_addresses: <% $.ip_addresses %>
|
|
ports: <% $.ports %>
|
|
credentials: <% $.credentials %>
|
|
queue_name: <% $.queue_name %>
|
|
on-success: enroll_nodes
|
|
on-error: discover_nodes_failed
|
|
publish:
|
|
nodes_json: <% task().result.nodes_json %>
|
|
|
|
discover_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(discover_nodes).result %>
|
|
|
|
enroll_nodes:
|
|
workflow: tripleo.baremetal.v1.register_or_update
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
initial_state: <% $.initial_state %>
|
|
on-success: send_message
|
|
on-error: enroll_nodes_failed
|
|
publish:
|
|
registered_nodes: <% task().result.registered_nodes %>
|
|
|
|
enroll_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(enroll_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.discover_and_enroll_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
registered_nodes: <% $.get('registered_nodes', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|