self-healing-sig/use-cases/heat-mistral-aodh/lb_server.yaml

200 lines
5.3 KiB
YAML

heat_template_version: rocky
description: A load-balancer server
parameters:
image:
type: string
description: Image used for servers
key_name:
type: string
description: SSH key to connect to the servers
flavor:
type: string
description: flavor used by the servers
security_group:
type: string
description: security_group used by the web servers
pool_id:
type: string
description: Pool to contact
user_data:
type: string
description: Server user_data
metadata:
type: json
network:
type: string
description: Network used by the server
subnet:
type: string
description: Subnet used by the server
external_network:
type: string
description: UUID or Name of a Neutron external network
root_stack_id:
type: string
default: ""
conditions:
is_standalone: {equals: [{get_param: root_stack_id}, ""]}
resources:
config:
type: OS::Heat::SoftwareConfig
properties:
group: script
inputs:
- name: host
- name: version
outputs:
- name: result
config:
get_file: nginx-script.sh
deployment:
type: OS::Heat::SoftwareDeployment
properties:
config:
get_resource: config
server:
get_resource: server
input_values:
host: { get_attr: [server, first_address] }
version: "v1.0.0"
server:
type: OS::Nova::Server
properties:
flavor: {get_param: flavor}
security_groups: [{get_param: security_group} ]
image: {get_param: image}
key_name: {get_param: key_name}
metadata: {get_param: metadata}
user_data: {get_param: user_data}
user_data_format: SOFTWARE_CONFIG
networks:
- {network: {get_param: network} }
member:
#type: OS::Neutron::LBaaS::PoolMember
type: OS::Octavia::PoolMember
properties:
pool: {get_param: pool_id}
address: {get_attr: [server, first_address]}
protocol_port: 80
subnet: {get_param: subnet}
server_floating_ip_assoc:
type: OS::Neutron::FloatingIPAssociation
properties:
floatingip_id: {get_resource: floating_ip}
port_id: {get_attr: [server, addresses, {get_param: network}, 0, port]}
floating_ip:
type: OS::Neutron::FloatingIP
properties:
floating_network: {get_param: external_network}
alarm_queue:
type: OS::Zaqar::Queue
stop_event_alarm:
type: OS::Aodh::EventAlarm
properties:
event_type: compute.instance.update
query:
- field: traits.instance_id
value: {get_resource: server}
op: eq
- field: traits.state
value: stopped
op: eq
alarm_queues:
- {get_resource: alarm_queue}
error_event_alarm:
type: OS::Aodh::EventAlarm
properties:
event_type: compute.instance.update
query:
- field: traits.instance_id
value: {get_resource: server}
op: eq
- field: traits.state
value: error
op: eq
alarm_queues:
- {get_resource: alarm_queue}
deleted_event_alarm:
type: OS::Aodh::EventAlarm
properties:
event_type: compute.instance.delete.*
query:
- field: traits.instance_id
value: {get_resource: server}
op: eq
alarm_queues:
- {get_resource: alarm_queue}
# The Aodh event alarm does not take effect immediately; it may take up to
# 60s (by default) for the event_alarm_cache_ttl to expire and the tenant's
# alarm data to be loaded. This resource ensures the stack is not completed
# until the alarm is active. See https://bugs.launchpad.net/aodh/+bug/1651273
alarm_cache_wait:
type: OS::Heat::TestResource
properties:
action_wait_secs:
create: 60
update: 60
value:
list_join:
- ''
- - {get_attr: [stop_event_alarm, show]}
- {get_attr: [error_event_alarm, show]}
- {get_attr: [deleted_event_alarm, show]}
alarm_subscription:
type: OS::Zaqar::MistralTrigger
properties:
queue_name: {get_resource: alarm_queue}
workflow_id: {get_resource: autoheal}
input:
stack_id: {get_param: "OS::stack_id"}
root_stack_id:
if:
- is_standalone
- {get_param: "OS::stack_id"}
- {get_param: "root_stack_id"}
autoheal:
type: OS::Mistral::Workflow
properties:
description: >
Mark a server as unhealthy and commence a stack update to replace it.
input:
stack_id:
root_stack_id:
type: direct
tasks:
- name: resources_mark_unhealthy
action:
list_join:
- ' '
- - heat.resources_mark_unhealthy
- stack_id=<% $.stack_id %>
- resource_name=<% env().notification.body.reason_data.event.traits.where($[0] = 'instance_id').select($[2]).first() %>
- mark_unhealthy=true
- resource_status_reason='Marked by alarm'
on_success:
- stacks_update
- name: stacks_update
action: heat.stacks_update stack_id=<% $.root_stack_id %> existing=true
outputs:
server_ip:
description: IP Address of the load-balanced server.
value: { get_attr: [server, first_address] }
lb_member:
description: LB member details.
value: { get_attr: [member, show] }