Merge "Fix swarm bay failure reporting"
This commit is contained in:
commit
30d9ce3f81
|
@ -7,22 +7,6 @@ echo "notifying heat"
|
||||||
STATUS="SUCCESS"
|
STATUS="SUCCESS"
|
||||||
REASON="Setup complete"
|
REASON="Setup complete"
|
||||||
DATA="OK"
|
DATA="OK"
|
||||||
FAILED_SERVICE=""
|
|
||||||
|
|
||||||
for service in $NODE_SERVICES; do
|
|
||||||
echo "checking service status for $service"
|
|
||||||
systemctl status $service
|
|
||||||
if [[ $? -ne 0 ]]; then
|
|
||||||
echo "$service is not active, the cluster is not valid"
|
|
||||||
FAILED_SERVICE="$FAILED_SERVICE $service"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ -n $FAILED_SERVICE ]]; then
|
|
||||||
STATUS="FAILURE"
|
|
||||||
REASON="Setup failed, $FAILED_SERVICE not start up correctly."
|
|
||||||
DATA="Failed"
|
|
||||||
fi
|
|
||||||
|
|
||||||
data=$(echo '{"Status": "'${STATUS}'", "Reason": "'$REASON'", "Data": "'${DATA}'", "UniqueId": "00000"}')
|
data=$(echo '{"Status": "'${STATUS}'", "Reason": "'$REASON'", "Data": "'${DATA}'", "UniqueId": "00000"}')
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
#cloud-config
|
||||||
|
merge_how: dict(recurse_array)+list(append)
|
||||||
|
write_files:
|
||||||
|
- path: /etc/systemd/system/$SERVICE-failure.service
|
||||||
|
owner: "root:root"
|
||||||
|
permissions: "0644"
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Description=$SERVICE Failure Notifier
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
TimeoutStartSec=0
|
||||||
|
ExecStart=/usr/bin/curl -sf -X PUT -H 'Content-Type: application/json' \
|
||||||
|
--data-binary '{"Status": "FAILURE", "Reason": "$SERVICE service failed to start.", "Data": "OK", "UniqueId": "00000"}' \
|
||||||
|
"$WAIT_HANDLE"
|
|
@ -9,12 +9,13 @@ write_files:
|
||||||
Description=Swarm Agent
|
Description=Swarm Agent
|
||||||
After=docker.service
|
After=docker.service
|
||||||
Requires=docker.service
|
Requires=docker.service
|
||||||
|
OnFailure=swarm-agent-failure.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
TimeoutStartSec=0
|
TimeoutStartSec=0
|
||||||
ExecStartPre=-/usr/bin/docker kill swarm-agent
|
ExecStartPre=-/usr/bin/docker kill swarm-agent
|
||||||
ExecStartPre=-/usr/bin/docker rm swarm-agent
|
ExecStartPre=-/usr/bin/docker rm swarm-agent
|
||||||
ExecStartPre=/usr/bin/docker pull swarm:0.2.0
|
ExecStartPre=-/usr/bin/docker pull swarm:0.2.0
|
||||||
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
|
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
|
||||||
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:0.2.0 join --addr $NODE_IP:2375 $DISCOVERY_URL
|
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:0.2.0 join --addr $NODE_IP:2375 $DISCOVERY_URL
|
||||||
ExecStop=/usr/bin/docker stop swarm-agent
|
ExecStop=/usr/bin/docker stop swarm-agent
|
||||||
|
|
|
@ -5,12 +5,13 @@ cat > /etc/systemd/system/swarm-manager.service << END_SERVICE_TOP
|
||||||
Description=Swarm Manager
|
Description=Swarm Manager
|
||||||
After=docker.service
|
After=docker.service
|
||||||
Requires=docker.service
|
Requires=docker.service
|
||||||
|
OnFailure=swarm-manager-failure.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
TimeoutStartSec=0
|
TimeoutStartSec=0
|
||||||
ExecStartPre=-/usr/bin/docker kill swarm-manager
|
ExecStartPre=-/usr/bin/docker kill swarm-manager
|
||||||
ExecStartPre=-/usr/bin/docker rm swarm-manager
|
ExecStartPre=-/usr/bin/docker rm swarm-manager
|
||||||
ExecStartPre=/usr/bin/docker pull swarm:0.2.0
|
ExecStartPre=-/usr/bin/docker pull swarm:0.2.0
|
||||||
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
|
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
|
||||||
ExecStart=/usr/bin/docker run --name swarm-manager \\
|
ExecStart=/usr/bin/docker run --name swarm-manager \\
|
||||||
-v /etc/docker:/etc/docker \\
|
-v /etc/docker:/etc/docker \\
|
||||||
|
|
|
@ -87,6 +87,18 @@ parameters:
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
|
|
||||||
|
cloud_init_wait_handle:
|
||||||
|
type: "AWS::CloudFormation::WaitConditionHandle"
|
||||||
|
|
||||||
|
cloud_init_wait_condition:
|
||||||
|
type: "AWS::CloudFormation::WaitCondition"
|
||||||
|
depends_on:
|
||||||
|
- swarm_master
|
||||||
|
properties:
|
||||||
|
Handle:
|
||||||
|
get_resource: cloud_init_wait_handle
|
||||||
|
Timeout: 6000
|
||||||
|
|
||||||
master_wait_handle:
|
master_wait_handle:
|
||||||
type: "AWS::CloudFormation::WaitConditionHandle"
|
type: "AWS::CloudFormation::WaitConditionHandle"
|
||||||
|
|
||||||
|
@ -179,7 +191,7 @@ resources:
|
||||||
str_replace:
|
str_replace:
|
||||||
template: {get_file: fragments/write-heat-params.yaml}
|
template: {get_file: fragments/write-heat-params.yaml}
|
||||||
params:
|
params:
|
||||||
"$WAIT_HANDLE": {get_resource: master_wait_handle}
|
"$WAIT_HANDLE": {get_resource: cloud_init_wait_handle}
|
||||||
"$HTTP_PROXY": {get_param: http_proxy}
|
"$HTTP_PROXY": {get_param: http_proxy}
|
||||||
"$HTTPS_PROXY": {get_param: https_proxy}
|
"$HTTPS_PROXY": {get_param: https_proxy}
|
||||||
"$NO_PROXY": {get_param: no_proxy}
|
"$NO_PROXY": {get_param: no_proxy}
|
||||||
|
@ -214,6 +226,28 @@ resources:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config: {get_file: fragments/write-docker-service.sh}
|
config: {get_file: fragments/write-docker-service.sh}
|
||||||
|
|
||||||
|
write_swarm_agent_failure_service:
|
||||||
|
type: "OS::Heat::SoftwareConfig"
|
||||||
|
properties:
|
||||||
|
group: ungrouped
|
||||||
|
config:
|
||||||
|
str_replace:
|
||||||
|
template: {get_file: fragments/write-bay-failure-service.yaml}
|
||||||
|
params:
|
||||||
|
"$SERVICE": swarm-agent
|
||||||
|
"$WAIT_HANDLE": {get_resource: agent_wait_handle}
|
||||||
|
|
||||||
|
write_swarm_manager_failure_service:
|
||||||
|
type: "OS::Heat::SoftwareConfig"
|
||||||
|
properties:
|
||||||
|
group: ungrouped
|
||||||
|
config:
|
||||||
|
str_replace:
|
||||||
|
template: {get_file: fragments/write-bay-failure-service.yaml}
|
||||||
|
params:
|
||||||
|
"$SERVICE": swarm-manager
|
||||||
|
"$WAIT_HANDLE": {get_resource: master_wait_handle}
|
||||||
|
|
||||||
write_docker_socket:
|
write_docker_socket:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
properties:
|
properties:
|
||||||
|
@ -257,17 +291,14 @@ resources:
|
||||||
config:
|
config:
|
||||||
str_replace:
|
str_replace:
|
||||||
template: {get_file: fragments/enable-services.sh}
|
template: {get_file: fragments/enable-services.sh}
|
||||||
params: &node_services
|
params:
|
||||||
"$NODE_SERVICES": "docker.socket swarm-agent swarm-manager"
|
"$NODE_SERVICES": "docker.socket swarm-agent swarm-manager"
|
||||||
|
|
||||||
cfn_signal:
|
cfn_signal:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
properties:
|
properties:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config:
|
config: {get_file: fragments/cfn-signal.sh}
|
||||||
str_replace:
|
|
||||||
template: {get_file: fragments/cfn-signal.sh}
|
|
||||||
params: *node_services
|
|
||||||
|
|
||||||
disable_selinux:
|
disable_selinux:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
|
@ -289,6 +320,8 @@ resources:
|
||||||
- config: {get_resource: remove_docker_key}
|
- config: {get_resource: remove_docker_key}
|
||||||
- config: {get_resource: write_heat_params}
|
- config: {get_resource: write_heat_params}
|
||||||
- config: {get_resource: make_cert}
|
- config: {get_resource: make_cert}
|
||||||
|
- config: {get_resource: write_swarm_agent_failure_service}
|
||||||
|
- config: {get_resource: write_swarm_manager_failure_service}
|
||||||
- config: {get_resource: write_docker_service}
|
- config: {get_resource: write_docker_service}
|
||||||
- config: {get_resource: write_docker_socket}
|
- config: {get_resource: write_docker_socket}
|
||||||
- config: {get_resource: write_swarm_agent_service}
|
- config: {get_resource: write_swarm_agent_service}
|
||||||
|
|
|
@ -75,16 +75,16 @@ parameters:
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
|
|
||||||
node_wait_handle:
|
node_cloud_init_wait_handle:
|
||||||
type: "AWS::CloudFormation::WaitConditionHandle"
|
type: "AWS::CloudFormation::WaitConditionHandle"
|
||||||
|
|
||||||
node_wait_condition:
|
node_cloud_init_wait_condition:
|
||||||
type: "AWS::CloudFormation::WaitCondition"
|
type: "AWS::CloudFormation::WaitCondition"
|
||||||
depends_on:
|
depends_on:
|
||||||
- swarm_node
|
- swarm_node
|
||||||
properties:
|
properties:
|
||||||
Handle:
|
Handle:
|
||||||
get_resource: node_wait_handle
|
get_resource: node_cloud_init_wait_handle
|
||||||
Timeout: 6000
|
Timeout: 6000
|
||||||
|
|
||||||
node_agent_wait_handle:
|
node_agent_wait_handle:
|
||||||
|
@ -126,7 +126,7 @@ resources:
|
||||||
str_replace:
|
str_replace:
|
||||||
template: {get_file: fragments/write-heat-params.yaml}
|
template: {get_file: fragments/write-heat-params.yaml}
|
||||||
params:
|
params:
|
||||||
"$WAIT_HANDLE": {get_resource: node_wait_handle}
|
"$WAIT_HANDLE": {get_resource: node_cloud_init_wait_handle}
|
||||||
"$HTTP_PROXY": {get_param: http_proxy}
|
"$HTTP_PROXY": {get_param: http_proxy}
|
||||||
"$HTTPS_PROXY": {get_param: https_proxy}
|
"$HTTPS_PROXY": {get_param: https_proxy}
|
||||||
"$NO_PROXY": {get_param: no_proxy}
|
"$NO_PROXY": {get_param: no_proxy}
|
||||||
|
@ -167,6 +167,17 @@ resources:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config: {get_file: fragments/write-docker-socket.yaml}
|
config: {get_file: fragments/write-docker-socket.yaml}
|
||||||
|
|
||||||
|
write_swarm_agent_failure_service:
|
||||||
|
type: "OS::Heat::SoftwareConfig"
|
||||||
|
properties:
|
||||||
|
group: ungrouped
|
||||||
|
config:
|
||||||
|
str_replace:
|
||||||
|
template: {get_file: fragments/write-bay-failure-service.yaml}
|
||||||
|
params:
|
||||||
|
"$SERVICE": swarm-agent
|
||||||
|
"$WAIT_HANDLE": {get_resource: node_agent_wait_handle}
|
||||||
|
|
||||||
write_swarm_agent_service:
|
write_swarm_agent_service:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
properties:
|
properties:
|
||||||
|
@ -189,17 +200,14 @@ resources:
|
||||||
config:
|
config:
|
||||||
str_replace:
|
str_replace:
|
||||||
template: {get_file: fragments/enable-services.sh}
|
template: {get_file: fragments/enable-services.sh}
|
||||||
params: &node_services
|
params:
|
||||||
"$NODE_SERVICES": "docker.socket swarm-agent"
|
"$NODE_SERVICES": "docker.socket swarm-agent"
|
||||||
|
|
||||||
cfn_signal:
|
cfn_signal:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
properties:
|
properties:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config:
|
config: {get_file: fragments/cfn-signal.sh}
|
||||||
str_replace:
|
|
||||||
template: {get_file: fragments/cfn-signal.sh}
|
|
||||||
params: *node_services
|
|
||||||
|
|
||||||
disable_selinux:
|
disable_selinux:
|
||||||
type: "OS::Heat::SoftwareConfig"
|
type: "OS::Heat::SoftwareConfig"
|
||||||
|
@ -221,6 +229,7 @@ resources:
|
||||||
- config: {get_resource: remove_docker_key}
|
- config: {get_resource: remove_docker_key}
|
||||||
- config: {get_resource: write_heat_params}
|
- config: {get_resource: write_heat_params}
|
||||||
- config: {get_resource: make_cert}
|
- config: {get_resource: make_cert}
|
||||||
|
- config: {get_resource: write_swarm_agent_failure_service}
|
||||||
- config: {get_resource: write_swarm_agent_service}
|
- config: {get_resource: write_swarm_agent_service}
|
||||||
- config: {get_resource: write_docker_service}
|
- config: {get_resource: write_docker_service}
|
||||||
- config: {get_resource: write_docker_socket}
|
- config: {get_resource: write_docker_socket}
|
||||||
|
|
Loading…
Reference in New Issue