d97cd4a005
Currently these workflows succeed in any case, since we don't have any condition to fail. This change makes them fail if the resulting state does not match the expected one. It also handles the case when a node goes into one of the failure states, so that we don't wait until timeout. Proper error message is returned to avoid confusing operators. Finally, it reduces the traffic between mistral and ironic by only requesting the required fields. Partial-Bug: #1755754 Closes-Bug: #1667776 Change-Id: Ice19306d4c4a2080b0337bc02a6ccee4a81411b5
1318 lines
41 KiB
YAML
1318 lines
41 KiB
YAML
---
|
|
version: '2.0'
|
|
name: tripleo.baremetal.v1
|
|
description: TripleO Baremetal Workflows
|
|
|
|
workflows:
|
|
|
|
set_node_state:
|
|
input:
|
|
- node_uuid
|
|
- state_action
|
|
- target_state
|
|
- error_states:
|
|
# The default includes all failure states, even unused by TripleO.
|
|
- 'error'
|
|
- 'adopt failed'
|
|
- 'clean failed'
|
|
- 'deploy failed'
|
|
- 'inspect failed'
|
|
- 'rescue failed'
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_provision_state:
|
|
on-success: wait_for_provision_state
|
|
on-error: set_provision_state_failed
|
|
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state=<% $.state_action %>
|
|
|
|
set_provision_state_failed:
|
|
publish:
|
|
message: <% task(set_provision_state).result %>
|
|
on-complete: fail
|
|
|
|
wait_for_provision_state:
|
|
action: ironic.node_get
|
|
input:
|
|
node_id: <% $.node_uuid %>
|
|
fields: ['provision_state', 'last_error']
|
|
timeout: 1200 #20 minutes
|
|
retry:
|
|
delay: 3
|
|
count: 400
|
|
continue-on: <% not task().result.provision_state in [$.target_state] + $.error_states %>
|
|
on-complete:
|
|
- state_not_reached: <% task().result.provision_state != $.target_state %>
|
|
|
|
state_not_reached:
|
|
publish:
|
|
message: >-
|
|
Node <% $.node_uuid %> did not reach state "<% $.target_state %>",
|
|
the state is "<% task(wait_for_provision_state).result.provision_state %>",
|
|
error: <% task(wait_for_provision_state).result.last_error %>
|
|
on-complete: fail
|
|
|
|
output-on-error:
|
|
result: <% $.message %>
|
|
|
|
set_power_state:
|
|
input:
|
|
- node_uuid
|
|
- state_action
|
|
- target_state
|
|
- error_state: 'error'
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_power_state:
|
|
on-success: wait_for_power_state
|
|
on-error: set_power_state_failed
|
|
action: ironic.node_set_power_state node_id=<% $.node_uuid %> state=<% $.state_action %>
|
|
|
|
set_power_state_failed:
|
|
publish:
|
|
message: <% task(set_power_state).result %>
|
|
on-complete: fail
|
|
|
|
wait_for_power_state:
|
|
action: ironic.node_get
|
|
input:
|
|
node_id: <% $.node_uuid %>
|
|
fields: ['power_state', 'last_error']
|
|
timeout: 120 #2 minutes
|
|
retry:
|
|
delay: 6
|
|
count: 20
|
|
continue-on: <% not task().result.power_state in [$.target_state, $.error_state] %>
|
|
on-complete:
|
|
- state_not_reached: <% task().result.power_state != $.target_state %>
|
|
|
|
state_not_reached:
|
|
publish:
|
|
message: >-
|
|
Node <% $.node_uuid %> did not reach power state "<% $.target_state %>",
|
|
the state is "<% task(wait_for_power_state).result.power_state %>",
|
|
error: <% task(wait_for_power_state).result.last_error %>
|
|
on-complete: fail
|
|
|
|
output-on-error:
|
|
result: <% $.message %>
|
|
|
|
manual_cleaning:
|
|
input:
|
|
- node_uuid
|
|
- clean_steps
|
|
- timeout: 7200 # 2 hours (cleaning can take really long)
|
|
- retry_delay: 10
|
|
- retry_count: 720
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_provision_state:
|
|
on-success: wait_for_provision_state
|
|
on-error: set_provision_state_failed
|
|
action: ironic.node_set_provision_state node_uuid=<% $.node_uuid %> state='clean' cleansteps=<% $.clean_steps %>
|
|
|
|
set_provision_state_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_provision_state).result %>
|
|
|
|
wait_for_provision_state:
|
|
on-success: send_message
|
|
action: ironic.node_get node_id=<% $.node_uuid %>
|
|
timeout: <% $.timeout %>
|
|
retry:
|
|
delay: <% $.retry_delay %>
|
|
count: <% $.retry_count %>
|
|
continue-on: <% task().result.provision_state != 'manageable' %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.manual_cleaning
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
validate_nodes:
|
|
description: Validate nodes JSON
|
|
|
|
input:
|
|
- nodes_json
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
validate_nodes:
|
|
action: tripleo.baremetal.validate_nodes
|
|
on-success: send_message
|
|
on-error: validation_failed
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
|
|
validation_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(validate_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.validate_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
register_or_update:
|
|
description: Take nodes JSON and create nodes in a "manageable" state
|
|
|
|
input:
|
|
- nodes_json
|
|
- remove: False
|
|
- queue_name: tripleo
|
|
- kernel_name: null
|
|
- ramdisk_name: null
|
|
- instance_boot_option: local
|
|
- initial_state: manageable
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
validate_input:
|
|
workflow: tripleo.baremetal.v1.validate_nodes
|
|
on-success: register_or_update_nodes
|
|
on-error: validation_failed
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
queue_name: <% $.queue_name %>
|
|
|
|
validation_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(validate_input).result %>
|
|
registered_nodes: []
|
|
|
|
register_or_update_nodes:
|
|
action: tripleo.baremetal.register_or_update_nodes
|
|
on-success:
|
|
- set_nodes_managed: <% $.initial_state != "enroll" %>
|
|
- send_message: <% $.initial_state = "enroll" %>
|
|
on-error: set_status_failed_register_or_update_nodes
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
remove: <% $.remove %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
publish:
|
|
registered_nodes: <% task().result %>
|
|
new_nodes: <% task().result.where($.provision_state = 'enroll') %>
|
|
|
|
set_status_failed_register_or_update_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(register_or_update_nodes).result %>
|
|
registered_nodes: []
|
|
|
|
set_nodes_managed:
|
|
on-success:
|
|
- set_nodes_available: <% $.initial_state = "available" %>
|
|
- send_message: <% $.initial_state != "available" %>
|
|
on-error: set_status_failed_nodes_managed
|
|
workflow: tripleo.baremetal.v1.manage
|
|
input:
|
|
node_uuids: <% $.new_nodes.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: <% $.new_nodes.len() %> node(s) successfully moved to the "manageable" state.
|
|
|
|
set_status_failed_nodes_managed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_managed).result %>
|
|
|
|
set_nodes_available:
|
|
on-success: send_message
|
|
on-error: set_status_failed_nodes_available
|
|
workflow: tripleo.baremetal.v1.provide node_uuids=<% $.new_nodes.uuid %> queue_name=<% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: <% $.new_nodes.len() %> node(s) successfully moved to the "available" state.
|
|
|
|
set_status_failed_nodes_available:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_available).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.register_or_update
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
registered_nodes: <% $.registered_nodes or [] %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
provide:
|
|
description: Take a list of nodes and move them to "available"
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_nodes_available:
|
|
on-success: cell_v2_discover_hosts
|
|
on-error: set_status_failed_nodes_available
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_node_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
state_action: 'provide'
|
|
target_state: 'available'
|
|
|
|
set_status_failed_nodes_available:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_available).result %>
|
|
|
|
cell_v2_discover_hosts:
|
|
on-success: try_power_off
|
|
on-error: cell_v2_discover_hosts_failed
|
|
workflow: tripleo.baremetal.v1.cellv2_discovery
|
|
input:
|
|
node_uuids: <% $.node_uuids %>
|
|
queue_name: <% $.queue_name %>
|
|
timeout: 900 #15 minutes
|
|
retry:
|
|
delay: 30
|
|
count: 30
|
|
|
|
cell_v2_discover_hosts_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(cell_v2_discover_hosts).result %>
|
|
|
|
try_power_off:
|
|
on-success: send_message
|
|
on-error: power_off_failed
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_power_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
state_action: 'off'
|
|
target_state: 'power off'
|
|
publish:
|
|
status: SUCCESS
|
|
message: <% $.node_uuids.len() %> node(s) successfully moved to the "available" state.
|
|
|
|
power_off_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(try_power_off).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.provide
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
provide_manageable_nodes:
|
|
description: Provide all nodes in a 'manageable' state.
|
|
|
|
input:
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: provide_manageable
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
provide_manageable:
|
|
on-success: send_message
|
|
workflow: tripleo.baremetal.v1.provide
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.provide_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
manage:
|
|
description: Set a list of nodes to 'manageable' state
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_nodes_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_failed_nodes_manageable
|
|
with-items: uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.set_node_state
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
state_action: 'manage'
|
|
target_state: 'manageable'
|
|
error_states:
|
|
# node going back to enroll designates power credentials failure
|
|
- 'enroll'
|
|
- 'error'
|
|
|
|
set_status_failed_nodes_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_nodes_manageable).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.manage
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
_introspect:
|
|
description: >
|
|
An internal workflow. The tripleo.baremetal.v1.introspect workflow
|
|
should be used for introspection.
|
|
|
|
input:
|
|
- node_uuid
|
|
- timeout
|
|
- queue_name
|
|
|
|
output:
|
|
result: <% task(start_introspection).result %>
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
start_introspection:
|
|
action: baremetal_introspection.introspect uuid=<% $.node_uuid %>
|
|
on-success: wait_for_introspection_to_finish
|
|
on-error: set_status_failed_start_introspection
|
|
|
|
set_status_failed_start_introspection:
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(start_introspection).result %>
|
|
introspected_nodes: []
|
|
on-success: send_message
|
|
|
|
wait_for_introspection_to_finish:
|
|
action: baremetal_introspection.wait_for_finish
|
|
input:
|
|
uuids: <% [$.node_uuid] %>
|
|
# The interval is 10 seconds, so divide to make the overall timeout
|
|
# in seconds correct.
|
|
max_retries: <% $.timeout / 10 %>
|
|
retry_interval: 10
|
|
publish:
|
|
introspected_node: <% task().result.values().first() %>
|
|
status: <% bool(task().result.values().first().error) and "FAILED" or "SUCCESS" %>
|
|
publish-on-error:
|
|
status: FAILED
|
|
message: <% task().result %>
|
|
on-success: wait_for_introspection_to_finish_success
|
|
on-error: wait_for_introspection_to_finish_error
|
|
|
|
wait_for_introspection_to_finish_success:
|
|
publish:
|
|
message: <% "Introspection of node {0} completed. Status:{1}. Errors:{2}".format($.introspected_node.uuid, $.status, $.introspected_node.error) %>
|
|
on-success: send_message
|
|
|
|
wait_for_introspection_to_finish_error:
|
|
publish:
|
|
message: <% "Introspection of node {0} timed out.".format($.node_uuid) %>
|
|
on-success: send_message
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1._introspect
|
|
payload:
|
|
status: <% $.status %>
|
|
message: <% $.message %>
|
|
introspected_node: <% $.get('introspected_node') %>
|
|
node_uuid: <% $.node_uuid %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
introspect:
|
|
description: >
|
|
Take a list of nodes and move them through introspection.
|
|
|
|
By default each node will attempt introspection up to 3 times (two
|
|
retries plus the initial attemp) if it fails. This behaviour can be
|
|
modified by changing the max_retry_attempts input.
|
|
|
|
The workflow will assume the node has timed out after 20 minutes (1200
|
|
seconds). This can be changed by passing the node_timeout input in
|
|
seconds.
|
|
|
|
input:
|
|
- node_uuids
|
|
- run_validations: False
|
|
- queue_name: tripleo
|
|
- concurrency: 20
|
|
- max_retry_attempts: 2
|
|
- node_timeout: 1200
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
task-defaults:
|
|
on-error: unhandled_error
|
|
|
|
tasks:
|
|
initialize:
|
|
publish:
|
|
introspection_attempt: 1
|
|
on-complete:
|
|
- run_validations: <% $.run_validations %>
|
|
- introspect_nodes: <% not $.run_validations %>
|
|
|
|
run_validations:
|
|
workflow: tripleo.validations.v1.run_groups
|
|
input:
|
|
group_names:
|
|
- 'pre-introspection'
|
|
queue_name: <% $.queue_name %>
|
|
on-success: introspect_nodes
|
|
on-error: set_validations_failed
|
|
|
|
set_validations_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(run_validations).result %>
|
|
|
|
introspect_nodes:
|
|
with-items: uuid in <% $.node_uuids %>
|
|
concurrency: <% $.concurrency %>
|
|
workflow: _introspect
|
|
input:
|
|
node_uuid: <% $.uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
timeout: <% $.node_timeout %>
|
|
# on-error is triggered if one or more nodes failed introspection. We
|
|
# still go to get_introspection_status as it will collect the result
|
|
# for each node. Unless we hit the retry limit.
|
|
on-error:
|
|
- get_introspection_status: <% $.introspection_attempt <= $.max_retry_attempts %>
|
|
- max_retry_attempts_reached: <% $.introspection_attempt > $.max_retry_attempts %>
|
|
on-success: get_introspection_status
|
|
|
|
get_introspection_status:
|
|
with-items: uuid in <% $.node_uuids %>
|
|
action: baremetal_introspection.get_status
|
|
input:
|
|
uuid: <% $.uuid %>
|
|
publish:
|
|
introspected_nodes: <% task().result.toDict($.uuid, $) %>
|
|
# Currently there is no way for us to ignore user introspection
|
|
# aborts. This means we will retry aborted nodes until the Ironic API
|
|
# gives us more details (error code or a boolean to show aborts etc.)
|
|
# If a node hasn't finished, we consider it to be failed.
|
|
# TODO(d0ugal): When possible, don't retry introspection of nodes
|
|
# that a user manually aborted.
|
|
failed_introspection: <% task().result.where($.finished = true and $.error != null).select($.uuid) + task().result.where($.finished = false).select($.uuid) %>
|
|
publish-on-error:
|
|
# If a node fails to start introspection, getting the status can fail.
|
|
# When that happens, the result is a string and the nodes need to be
|
|
# filtered out.
|
|
introspected_nodes: <% task().result.where(isDict($)).toDict($.uuid, $) %>
|
|
# If there was an error, the exception string we get doesn't give us
|
|
# the UUID. So we use a set difference to find the UUIDs missing in
|
|
# the results. These are then added to the failed nodes.
|
|
failed_introspection: <% ($.node_uuids.toSet() - task().result.where(isDict($)).select($.uuid).toSet()) + task().result.where(isDict($)).where($.finished = true and $.error != null).toSet() + task().result.where(isDict($)).where($.finished = false).toSet() %>
|
|
on-error: increase_attempt_counter
|
|
on-success:
|
|
- successful_introspection: <% $.failed_introspection.len() = 0 %>
|
|
- increase_attempt_counter: <% $.failed_introspection.len() > 0 %>
|
|
|
|
increase_attempt_counter:
|
|
publish:
|
|
introspection_attempt: <% $.introspection_attempt + 1 %>
|
|
on-complete:
|
|
retry_failed_nodes
|
|
|
|
retry_failed_nodes:
|
|
publish:
|
|
status: RUNNING
|
|
message: <% 'Retrying {0} nodes that failed introspection. Attempt {1} of {2} '.format($.failed_introspection.len(), $.introspection_attempt, $.max_retry_attempts + 1) %>
|
|
# We are about to retry, update the tracking stats.
|
|
node_uuids: <% $.failed_introspection %>
|
|
on-success:
|
|
- send_message
|
|
- introspect_nodes
|
|
|
|
max_retry_attempts_reached:
|
|
publish:
|
|
status: FAILED
|
|
message: <% 'Retry limit reached with {0} nodes still failing introspection'.format($.failed_introspection.len()) %>
|
|
on-complete: send_message
|
|
|
|
successful_introspection:
|
|
publish:
|
|
status: SUCCESS
|
|
message: Successfully introspected <% $.introspected_nodes.len() %> node(s).
|
|
on-complete: send_message
|
|
|
|
unhandled_error:
|
|
publish:
|
|
status: FAILED
|
|
message: "Unhandled workflow error"
|
|
on-complete: send_message
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.introspect
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
introspected_nodes: <% $.get('introspected_nodes', []) %>
|
|
failed_introspection: <% $.get('failed_introspection', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
introspect_manageable_nodes:
|
|
description: Introspect all nodes in a 'manageable' state.
|
|
|
|
input:
|
|
- run_validations: False
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: validate_nodes
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
validate_nodes:
|
|
on-success:
|
|
- introspect_manageable: <% $.managed_nodes.len() > 0 %>
|
|
- set_status_failed_no_nodes: <% $.managed_nodes.len() = 0 %>
|
|
|
|
set_status_failed_no_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: No manageable nodes to introspect. Check node states and maintenance.
|
|
|
|
introspect_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_introspect_manageable
|
|
workflow: tripleo.baremetal.v1.introspect
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
run_validations: <% $.run_validations %>
|
|
queue_name: <% $.queue_name %>
|
|
publish:
|
|
introspected_nodes: <% task().result.introspected_nodes %>
|
|
|
|
set_status_introspect_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(introspect_manageable).result %>
|
|
introspected_nodes: []
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.introspect_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
introspected_nodes: <% $.get('introspected_nodes', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
configure:
|
|
description: Take a list of manageable nodes and update their boot configuration.
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
- kernel_name: bm-deploy-kernel
|
|
- ramdisk_name: bm-deploy-ramdisk
|
|
- instance_boot_option: null
|
|
- root_device: null
|
|
- root_device_minimum_size: 4
|
|
- overwrite_root_device_hints: False
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
configure_boot:
|
|
on-success: configure_root_device
|
|
on-error: set_status_failed_configure_boot
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: tripleo.baremetal.configure_boot node_uuid=<% $.node_uuid %> kernel_name=<% $.kernel_name %> ramdisk_name=<% $.ramdisk_name %> instance_boot_option=<% $.instance_boot_option %>
|
|
|
|
configure_root_device:
|
|
on-success: send_message
|
|
on-error: set_status_failed_configure_root_device
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: tripleo.baremetal.configure_root_device node_uuid=<% $.node_uuid %> root_device=<% $.root_device %> minimum_size=<% $.root_device_minimum_size %> overwrite=<% $.overwrite_root_device_hints %>
|
|
publish:
|
|
status: SUCCESS
|
|
message: 'Successfully configured the nodes.'
|
|
|
|
set_status_failed_configure_boot:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_boot).result %>
|
|
|
|
set_status_failed_configure_root_device:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_root_device).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.configure
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
configure_manageable_nodes:
|
|
description: Update the boot configuration of all nodes in 'manageable' state.
|
|
|
|
input:
|
|
- queue_name: tripleo
|
|
- kernel_name: 'bm-deploy-kernel'
|
|
- ramdisk_name: 'bm-deploy-ramdisk'
|
|
- instance_boot_option: null
|
|
- root_device: null
|
|
- root_device_minimum_size: 4
|
|
- overwrite_root_device_hints: False
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_manageable_nodes:
|
|
action: ironic.node_list maintenance=False associated=False
|
|
on-success: configure_manageable
|
|
on-error: set_status_failed_get_manageable_nodes
|
|
publish:
|
|
managed_nodes: <% task().result.where($.provision_state = 'manageable').uuid %>
|
|
|
|
configure_manageable:
|
|
on-success: send_message
|
|
on-error: set_status_failed_configure_manageable
|
|
workflow: tripleo.baremetal.v1.configure
|
|
input:
|
|
node_uuids: <% $.managed_nodes %>
|
|
queue_name: <% $.queue_name %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
root_device: <% $.root_device %>
|
|
root_device_minimum_size: <% $.root_device_minimum_size %>
|
|
overwrite_root_device_hints: <% $.overwrite_root_device_hints %>
|
|
publish:
|
|
message: 'Manageable nodes configured successfully.'
|
|
|
|
set_status_failed_configure_manageable:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(configure_manageable).result %>
|
|
|
|
set_status_failed_get_manageable_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_manageable_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.configure_manageable_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
tag_node:
|
|
description: Tag a node with a role
|
|
input:
|
|
- node_uuid
|
|
- role: null
|
|
- queue_name: tripleo
|
|
|
|
task-defaults:
|
|
on-error: send_message
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
update_node:
|
|
on-success: send_message
|
|
action: tripleo.baremetal.update_node_capability node_uuid=<% $.node_uuid %> capability='profile' value=<% $.role %>
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.tag_node
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
tag_nodes:
|
|
description: Runs the tag_node workflow in a loop
|
|
input:
|
|
- tag_node_uuids
|
|
- untag_node_uuids
|
|
- role
|
|
- plan: overcloud
|
|
- queue_name: tripleo
|
|
|
|
task-defaults:
|
|
on-error: send_message
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
tag_nodes:
|
|
with-items: node_uuid in <% $.tag_node_uuids %>
|
|
workflow: tripleo.baremetal.v1.tag_node
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
role: <% $.role %>
|
|
concurrency: 1
|
|
on-success: untag_nodes
|
|
|
|
untag_nodes:
|
|
with-items: node_uuid in <% $.untag_node_uuids %>
|
|
workflow: tripleo.baremetal.v1.tag_node
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
queue_name: <% $.queue_name %>
|
|
concurrency: 1
|
|
on-success: update_role_parameters
|
|
|
|
update_role_parameters:
|
|
on-success: send_message
|
|
action: tripleo.parameters.update_role role=<% $.role %> container=<% $.plan %>
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.tag_nodes
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
nodes_with_profile:
|
|
description: Find nodes with a specific profile
|
|
input:
|
|
- profile
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
get_active_nodes:
|
|
action: ironic.node_list maintenance=false provision_state='active' detail=true
|
|
on-success: get_available_nodes
|
|
on-error: set_status_failed_get_active_nodes
|
|
|
|
get_available_nodes:
|
|
action: ironic.node_list maintenance=false provision_state='available' detail=true
|
|
on-success: get_matching_nodes
|
|
on-error: set_status_failed_get_available_nodes
|
|
|
|
get_matching_nodes:
|
|
with-items: node in <% task(get_available_nodes).result + task(get_active_nodes).result %>
|
|
action: tripleo.baremetal.get_profile node=<% $.node %>
|
|
on-success: send_message
|
|
on-error: set_status_failed_get_matching_nodes
|
|
publish:
|
|
matching_nodes: <% let(input_profile_name => $.profile) -> task().result.where($.profile = $input_profile_name).uuid %>
|
|
|
|
set_status_failed_get_active_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_active_nodes).result %>
|
|
|
|
set_status_failed_get_available_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_available_nodes).result %>
|
|
|
|
set_status_failed_get_matching_nodes:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_matching_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.nodes_with_profile
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
matching_nodes: <% $.matching_nodes or [] %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
create_raid_configuration:
|
|
description: Create and apply RAID configuration for given nodes
|
|
input:
|
|
- node_uuids
|
|
- configuration
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
set_configuration:
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: ironic.node_set_target_raid_config node_ident=<% $.node_uuid %> target_raid_config=<% $.configuration %>
|
|
on-success: apply_configuration
|
|
on-error: set_configuration_failed
|
|
|
|
set_configuration_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(set_configuration).result %>
|
|
|
|
apply_configuration:
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
workflow: tripleo.baremetal.v1.manual_cleaning
|
|
input:
|
|
node_uuid: <% $.node_uuid %>
|
|
clean_steps:
|
|
- interface: raid
|
|
step: delete_configuration
|
|
- interface: raid
|
|
step: create_configuration
|
|
timeout: 1800 # building RAID should be fast than general cleaning
|
|
retry_count: 180
|
|
retry_delay: 10
|
|
on-success: send_message
|
|
on-error: apply_configuration_failed
|
|
publish:
|
|
message: <% task().result %>
|
|
status: SUCCESS
|
|
|
|
apply_configuration_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(apply_configuration).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.create_raid_configuration
|
|
payload:
|
|
status: <% $.get('status', 'FAILED') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
|
|
cellv2_discovery:
|
|
description: Run cell_v2 host discovery
|
|
|
|
input:
|
|
- node_uuids
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
cell_v2_discover_hosts:
|
|
on-success: wait_for_nova_resources
|
|
on-error: cell_v2_discover_hosts_failed
|
|
action: tripleo.baremetal.cell_v2_discover_hosts
|
|
|
|
cell_v2_discover_hosts_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(cell_v2_discover_hosts).result %>
|
|
|
|
wait_for_nova_resources:
|
|
on-success: send_message
|
|
on-error: wait_for_nova_resources_failed
|
|
with-items: node_uuid in <% $.node_uuids %>
|
|
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
|
|
|
|
wait_for_nova_resources_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(wait_for_nova_resources).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.cellv2_discovery
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
|
|
discover_nodes:
|
|
description: Run nodes discovery over the given IP range
|
|
|
|
input:
|
|
- ip_addresses
|
|
- credentials
|
|
- ports: [623]
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
get_all_nodes:
|
|
action: ironic.node_list
|
|
input:
|
|
fields: ["uuid", "driver", "driver_info"]
|
|
limit: 0
|
|
on-success: get_candidate_nodes
|
|
on-error: get_all_nodes_failed
|
|
publish:
|
|
existing_nodes: <% task().result %>
|
|
|
|
get_all_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_all_nodes).result %>
|
|
|
|
get_candidate_nodes:
|
|
action: tripleo.baremetal.get_candidate_nodes
|
|
input:
|
|
ip_addresses: <% $.ip_addresses %>
|
|
credentials: <% $.credentials %>
|
|
ports: <% $.ports %>
|
|
existing_nodes: <% $.existing_nodes %>
|
|
on-success: probe_nodes
|
|
on-error: get_candidate_nodes_failed
|
|
publish:
|
|
candidates: <% task().result %>
|
|
|
|
get_candidate_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(get_candidate_nodes).result %>
|
|
|
|
probe_nodes:
|
|
action: tripleo.baremetal.probe_node
|
|
on-success: send_message
|
|
on-error: probe_nodes_failed
|
|
input:
|
|
ip: <% $.node.ip %>
|
|
port: <% $.node.port %>
|
|
username: <% $.node.username %>
|
|
password: <% $.node.password %>
|
|
with-items:
|
|
- node in <% $.candidates %>
|
|
publish:
|
|
nodes_json: <% task().result.where($ != null) %>
|
|
|
|
probe_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(probe_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.discover_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
nodes_json: <% $.get('nodes_json', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|
|
|
|
discover_and_enroll_nodes:
|
|
description: Run nodes discovery over the given IP range and enroll nodes
|
|
|
|
input:
|
|
- ip_addresses
|
|
- credentials
|
|
- ports: [623]
|
|
- kernel_name: null
|
|
- ramdisk_name: null
|
|
- instance_boot_option: local
|
|
- initial_state: manageable
|
|
- queue_name: tripleo
|
|
|
|
tags:
|
|
- tripleo-common-managed
|
|
|
|
tasks:
|
|
|
|
discover_nodes:
|
|
workflow: tripleo.baremetal.v1.discover_nodes
|
|
input:
|
|
ip_addresses: <% $.ip_addresses %>
|
|
ports: <% $.ports %>
|
|
credentials: <% $.credentials %>
|
|
queue_name: <% $.queue_name %>
|
|
on-success: enroll_nodes
|
|
on-error: discover_nodes_failed
|
|
publish:
|
|
nodes_json: <% task().result.nodes_json %>
|
|
|
|
discover_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(discover_nodes).result %>
|
|
|
|
enroll_nodes:
|
|
workflow: tripleo.baremetal.v1.register_or_update
|
|
input:
|
|
nodes_json: <% $.nodes_json %>
|
|
kernel_name: <% $.kernel_name %>
|
|
ramdisk_name: <% $.ramdisk_name %>
|
|
instance_boot_option: <% $.instance_boot_option %>
|
|
initial_state: <% $.initial_state %>
|
|
on-success: send_message
|
|
on-error: enroll_nodes_failed
|
|
publish:
|
|
registered_nodes: <% task().result.registered_nodes %>
|
|
|
|
enroll_nodes_failed:
|
|
on-success: send_message
|
|
publish:
|
|
status: FAILED
|
|
message: <% task(enroll_nodes).result %>
|
|
|
|
send_message:
|
|
action: zaqar.queue_post
|
|
retry: count=5 delay=1
|
|
input:
|
|
queue_name: <% $.queue_name %>
|
|
messages:
|
|
body:
|
|
type: tripleo.baremetal.v1.discover_and_enroll_nodes
|
|
payload:
|
|
status: <% $.get('status', 'SUCCESS') %>
|
|
message: <% $.get('message', '') %>
|
|
execution: <% execution() %>
|
|
registered_nodes: <% $.get('registered_nodes', []) %>
|
|
on-success:
|
|
- fail: <% $.get('status') = "FAILED" %>
|