tripleo-common/workbooks/validations.yaml
Jiri Stransky 1930a75a45 Fix broken validations on stack update
Patch I3be30ed8bb61d8ed514991a2f55c5c4e36c56411 fixed a confusing
message on initial deploy (bug #1703942) but broke stack update for
cases when there are no extra free nodes available (even though they
are not needed).

This patch reverts the earlier one and fixes the confusing message in
an alternative way -- we first validate available node count (a
validation for this already exists), and only if it succeeds we
proceed to validate the availability of nodes matching particular
flavors.

Change-Id: I91ddc03e5ac22494f5276f08f87011074483d5c6
Closes-Bug: #1745997
Related-Bug: #1703942
(cherry picked from commit 8ceb4e2b78)
2018-03-05 15:21:10 +00:00

783 lines
24 KiB
YAML

---
version: '2.0'
name: tripleo.validations.v1
description: TripleO Validations Workflows v1
workflows:
run_validation:
input:
- validation_name
- plan: overcloud
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
notify_running:
on-complete: run_validation
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_validation
payload:
validation_name: <% $.validation_name %>
plan: <% $.plan %>
status: RUNNING
execution: <% execution() %>
run_validation:
on-success: send_message
on-error: set_status_failed
action: tripleo.validations.run_validation validation=<% $.validation_name %> plan=<% $.plan %>
publish:
status: SUCCESS
stdout: <% task().result.stdout %>
stderr: <% task().result.stderr %>
set_status_failed:
on-complete: send_message
publish:
status: FAILED
stdout: <% task(run_validation).result.stdout %>
stderr: <% task(run_validation).result.stderr %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_validation
payload:
validation_name: <% $.validation_name %>
plan: <% $.plan %>
status: <% $.get('status', 'SUCCESS') %>
stdout: <% $.stdout %>
stderr: <% $.stderr %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
run_validations:
input:
- validation_names: []
- plan: overcloud
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
notify_running:
on-complete: run_validations
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_validations
payload:
validation_names: <% $.validation_names %>
plan: <% $.plan %>
status: RUNNING
execution: <% execution() %>
run_validations:
on-success: send_message
on-error: set_status_failed
workflow: tripleo.validations.v1.run_validation validation_name=<% $.validation %> plan=<% $.plan %> queue_name=<% $.queue_name %>
with-items: validation in <% $.validation_names %>
publish:
status: SUCCESS
set_status_failed:
on-complete: send_message
publish:
status: FAILED
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_validations
payload:
validation_names: <% $.validation_names %>
plan: <% $.plan %>
status: <% $.get('status', 'SUCCESS') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
run_groups:
input:
- group_names: []
- plan: overcloud
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
find_validations:
on-success: notify_running
action: tripleo.validations.list_validations groups=<% $.group_names %>
publish:
validations: <% task().result %>
notify_running:
on-complete: run_validation_group
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_validations
payload:
group_names: <% $.group_names %>
validation_names: <% $.validations.id %>
plan: <% $.plan %>
status: RUNNING
execution: <% execution() %>
run_validation_group:
on-success: send_message
on-error: set_status_failed
workflow: tripleo.validations.v1.run_validation validation_name=<% $.validation %> plan=<% $.plan %> queue_name=<% $.queue_name %>
with-items: validation in <% $.validations.id %>
publish:
status: SUCCESS
set_status_failed:
on-complete: send_message
publish:
status: FAILED
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.run_groups
payload:
group_names: <% $.group_names %>
validation_names: <% $.validations.id %>
plan: <% $.plan %>
status: <% $.get('status', 'SUCCESS') %>
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
list:
input:
- group_names: []
tags:
- tripleo-common-managed
tasks:
find_validations:
action: tripleo.validations.list_validations groups=<% $.group_names %>
list_groups:
tags:
- tripleo-common-managed
tasks:
find_groups:
action: tripleo.validations.list_groups
add_validation_ssh_key_parameter:
input:
- container
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
test_validations_enabled:
action: tripleo.validations.enabled
on-success: get_pubkey
on-error: unset_validation_key_parameter
get_pubkey:
action: tripleo.validations.get_pubkey
on-success: set_validation_key_parameter
publish:
pubkey: <% task().result %>
set_validation_key_parameter:
action: tripleo.parameters.update
input:
parameters:
node_admin_extra_ssh_keys: <% $.pubkey %>
container: <% $.container %>
# NOTE(shadower): We need to clear keys from a previous deployment
unset_validation_key_parameter:
action: tripleo.parameters.update
input:
parameters:
node_admin_extra_ssh_keys: ""
container: <% $.container %>
copy_ssh_key:
input:
# FIXME: we should stop using heat-admin as e.g. split-stack
# environments (where Nova didn't create overcloud nodes) don't
# have it present
- overcloud_admin: heat-admin
- queue_name: tripleo
tags:
- tripleo-common-managed
tasks:
get_servers:
action: nova.servers_list
on-success: get_pubkey
publish:
servers: <% task().result._info %>
get_pubkey:
action: tripleo.validations.get_pubkey
on-success: deploy_ssh_key
publish:
pubkey: <% task().result %>
deploy_ssh_key:
workflow: tripleo.deployment.v1.deploy_on_server
with-items: server in <% $.servers %>
input:
server_name: <% $.server.name %>
server_uuid: <% $.server.id %>
config: |
#!/bin/bash
if ! grep "<% $.pubkey %>" /home/<% $.overcloud_admin %>/.ssh/authorized_keys; then
echo "<% $.pubkey %>" >> /home/<% $.overcloud_admin %>/.ssh/authorized_keys
fi
config_name: copy_ssh_key
group: script
queue_name: <% $.queue_name %>
check_boot_images:
input:
- deploy_kernel_name: 'bm-deploy-kernel'
- deploy_ramdisk_name: 'bm-deploy-ramdisk'
- run_validations: true
- queue_name: tripleo
output:
errors: <% $.errors %>
warnings: <% $.warnings %>
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
tags:
- tripleo-common-managed
tasks:
check_run_validations:
on-complete:
- get_images: <% $.run_validations %>
- send_message: <% not $.run_validations %>
get_images:
action: glance.images_list
on-success: check_images
publish:
images: <% task().result %>
check_images:
action: tripleo.validations.check_boot_images
input:
images: <% $.images %>
deploy_kernel_name: <% $.deploy_kernel_name %>
deploy_ramdisk_name: <% $.deploy_ramdisk_name %>
on-success: send_message
publish:
kernel_id: <% task().result.kernel_id %>
ramdisk_id: <% task().result.ramdisk_id %>
warnings: <% task().result.warnings %>
errors: <% task().result.errors %>
on-error: send_message
publish-on-error:
kernel_id: <% task().result.kernel_id %>
ramdisk_id: <% task().result.ramdisk_id %>
warnings: <% task().result.warnings %>
errors: <% task().result.errors %>
status: FAILED
message: <% task().result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.check_boot_images
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
collect_flavors:
input:
- roles_info: {}
- run_validations: true
- queue_name: tripleo
output:
errors: <% $.errors %>
warnings: <% $.warnings %>
flavors: <% $.flavors %>
tags:
- tripleo-common-managed
tasks:
check_run_validations:
on-complete:
- check_flavors: <% $.run_validations %>
- send_message: <% not $.run_validations %>
check_flavors:
action: tripleo.validations.check_flavors
input:
roles_info: <% $.roles_info %>
on-success: send_message
publish:
flavors: <% task().result.flavors %>
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
on-error: send_message
publish-on-error:
flavors: {}
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
status: FAILED
message: <% task().result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.collect_flavors
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
flavors: <% $.flavors %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
check_ironic_boot_configuration:
input:
- kernel_id: null
- ramdisk_id: null
- run_validations: true
- queue_name: tripleo
output:
errors: <% $.errors %>
warnings: <% $.warnings %>
tags:
- tripleo-common-managed
tasks:
check_run_validations:
on-complete:
- get_ironic_nodes: <% $.run_validations %>
- send_message: <% not $.run_validations %>
get_ironic_nodes:
action: ironic.node_list
input:
provision_state: available
maintenance: false
detail: true
on-success: check_node_boot_configuration
publish:
nodes: <% task().result %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
check_node_boot_configuration:
action: tripleo.validations.check_node_boot_configuration
input:
node: <% $.node %>
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
with-items: node in <% $.nodes %>
on-success: send_message
publish:
errors: <% task().result.errors.flatten() %>
warnings: <% task().result.warnings.flatten() %>
on-error: send_message
publish-on-error:
errors: <% task().result.errors.flatten() %>
warnings: <% task().result.warnings.flatten() %>
status: FAILED
message: <% task().result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.check_ironic_boot_configuration
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
verify_profiles:
input:
- flavors: []
- run_validations: true
- queue_name: tripleo
output:
errors: <% $.errors %>
warnings: <% $.warnings %>
tags:
- tripleo-common-managed
tasks:
check_run_validations:
on-complete:
- get_ironic_nodes: <% $.run_validations %>
- send_message: <% not $.run_validations %>
get_ironic_nodes:
action: ironic.node_list
input:
maintenance: false
detail: true
on-success: verify_profiles
publish:
nodes: <% task().result %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
verify_profiles:
action: tripleo.validations.verify_profiles
input:
nodes: <% $.nodes %>
flavors: <% $.flavors %>
on-success: send_message
publish:
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
on-error: send_message
publish-on-error:
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
status: FAILED
message: <% task().result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.verify_profiles
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
check_default_nodes_count:
input:
- stack_id: overcloud
- parameters: {}
- default_role_counts: {}
- run_validations: true
- queue_name: tripleo
output:
statistics: <% $.statistics %>
errors: <% $.errors %>
warnings: <% $.warnings %>
tags:
- tripleo-common-managed
tasks:
check_run_validations:
on-complete:
- get_hypervisor_statistics: <% $.run_validations %>
- send_message: <% not $.run_validations %>
get_hypervisor_statistics:
action: nova.hypervisors_statistics
on-success: get_stack
publish:
statistics: <% task().result %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
errors: []
warnings: []
statistics: null
get_stack:
action: heat.stacks_get
input:
stack_id: <% $.stack_id %>
on-success: get_associated_nodes
publish:
stack: <% task().result %>
on-error: get_associated_nodes
publish-on-error:
stack: null
get_associated_nodes:
action: ironic.node_list
input:
associated: true
on-success: get_available_nodes
publish:
associated_nodes: <% task().result %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
errors: []
warnings: []
get_available_nodes:
action: ironic.node_list
input:
provision_state: available
associated: false
maintenance: false
on-success: check_nodes_count
publish:
available_nodes: <% task().result %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
errors: []
warnings: []
check_nodes_count:
action: tripleo.validations.check_nodes_count
input:
statistics: <% $.statistics %>
stack: <% $.stack %>
associated_nodes: <% $.associated_nodes %>
available_nodes: <% $.available_nodes %>
parameters: <% $.parameters %>
default_role_counts: <% $.default_role_counts %>
on-success: send_message
publish:
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
on-error: send_message
publish-on-error:
status: FAILED
message: <% task().result %>
statistics: null
errors: <% task().result.errors %>
warnings: <% task().result.warnings %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.check_hypervisor_stats
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
statistics: <% $.statistics %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
check_pre_deployment_validations:
input:
- deploy_kernel_name: 'bm-deploy-kernel'
- deploy_ramdisk_name: 'bm-deploy-ramdisk'
- roles_info: {}
- stack_id: overcloud
- parameters: {}
- default_role_counts: {}
- run_validations: true
- queue_name: tripleo
output:
errors: <% $.errors %>
warnings: <% $.warnings %>
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
flavors: <% $.flavors %>
statistics: <% $.statistics %>
tags:
- tripleo-common-managed
tasks:
init_messages:
on-success: check_boot_images
publish:
errors: []
warnings: []
check_boot_images:
workflow: check_boot_images
input:
deploy_kernel_name: <% $.deploy_kernel_name %>
deploy_ramdisk_name: <% $.deploy_ramdisk_name %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
kernel_id: <% task().result.get('kernel_id') %>
ramdisk_id: <% task().result.get('ramdisk_id') %>
publish-on-error:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
kernel_id: <% task().result.get('kernel_id') %>
ramdisk_id: <% task().result.get('ramdisk_id') %>
status: FAILED
on-success: collect_flavors
on-error: collect_flavors
collect_flavors:
workflow: collect_flavors
input:
roles_info: <% $.roles_info %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
flavors: <% task().result.get('flavors') %>
publish-on-error:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
flavors: <% task().result.get('flavors') %>
status: FAILED
on-success: check_ironic_boot_configuration
on-error: check_ironic_boot_configuration
check_ironic_boot_configuration:
workflow: check_ironic_boot_configuration
input:
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
publish-on-error:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
status: FAILED
on-success: check_default_nodes_count
on-error: check_default_nodes_count
check_default_nodes_count:
workflow: check_default_nodes_count
# ironic-nova sync happens once in two minutes
retry: count=12 delay=10
input:
stack_id: <% $.stack_id %>
parameters: <% $.parameters %>
default_role_counts: <% $.default_role_counts %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
statistics: <% task().result.get('statistics') %>
publish-on-error:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
statistics: <% task().result.get('statistics') %>
status: FAILED
on-success: verify_profiles
# Do not confuse user with info about profiles if the nodes
# count is off in the first place. Skip directly to
# send_message. (bug 1703942)
on-error: send_message
verify_profiles:
workflow: verify_profiles
input:
flavors: <% $.flavors %>
run_validations: <% $.run_validations %>
queue_name: <% $.queue_name %>
publish:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
publish-on-error:
errors: <% $.errors + task().result.get('errors', []) %>
warnings: <% $.warnings + task().result.get('warnings', []) %>
status: FAILED
on-success: send_message
on-error: send_message
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.check_hypervisor_stats
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
kernel_id: <% $.kernel_id %>
ramdisk_id: <% $.ramdisk_id %>
flavors: <% $.flavors %>
statistics: <% $.statistics %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>