Merge "Fix swarm bay failure reporting"

This commit is contained in:
Jenkins 2015-10-06 13:26:42 +00:00 committed by Gerrit Code Review
commit 30d9ce3f81
6 changed files with 77 additions and 33 deletions

View File

@ -7,22 +7,6 @@ echo "notifying heat"
STATUS="SUCCESS"
REASON="Setup complete"
DATA="OK"
FAILED_SERVICE=""
for service in $NODE_SERVICES; do
echo "checking service status for $service"
systemctl status $service
if [[ $? -ne 0 ]]; then
echo "$service is not active, the cluster is not valid"
FAILED_SERVICE="$FAILED_SERVICE $service"
fi
done
if [[ -n $FAILED_SERVICE ]]; then
STATUS="FAILURE"
REASON="Setup failed, $FAILED_SERVICE not start up correctly."
DATA="Failed"
fi
data=$(echo '{"Status": "'${STATUS}'", "Reason": "'$REASON'", "Data": "'${DATA}'", "UniqueId": "00000"}')

View File

@ -0,0 +1,16 @@
#cloud-config
merge_how: dict(recurse_array)+list(append)
write_files:
- path: /etc/systemd/system/$SERVICE-failure.service
owner: "root:root"
permissions: "0644"
content: |
[Unit]
Description=$SERVICE Failure Notifier
[Service]
Type=simple
TimeoutStartSec=0
ExecStart=/usr/bin/curl -sf -X PUT -H 'Content-Type: application/json' \
--data-binary '{"Status": "FAILURE", "Reason": "$SERVICE service failed to start.", "Data": "OK", "UniqueId": "00000"}' \
"$WAIT_HANDLE"

View File

@ -9,12 +9,13 @@ write_files:
Description=Swarm Agent
After=docker.service
Requires=docker.service
OnFailure=swarm-agent-failure.service
[Service]
TimeoutStartSec=0
ExecStartPre=-/usr/bin/docker kill swarm-agent
ExecStartPre=-/usr/bin/docker rm swarm-agent
ExecStartPre=/usr/bin/docker pull swarm:0.2.0
ExecStartPre=-/usr/bin/docker pull swarm:0.2.0
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:0.2.0 join --addr $NODE_IP:2375 $DISCOVERY_URL
ExecStop=/usr/bin/docker stop swarm-agent

View File

@ -5,12 +5,13 @@ cat > /etc/systemd/system/swarm-manager.service << END_SERVICE_TOP
Description=Swarm Manager
After=docker.service
Requires=docker.service
OnFailure=swarm-manager-failure.service
[Service]
TimeoutStartSec=0
ExecStartPre=-/usr/bin/docker kill swarm-manager
ExecStartPre=-/usr/bin/docker rm swarm-manager
ExecStartPre=/usr/bin/docker pull swarm:0.2.0
ExecStartPre=-/usr/bin/docker pull swarm:0.2.0
#TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image
ExecStart=/usr/bin/docker run --name swarm-manager \\
-v /etc/docker:/etc/docker \\

View File

@ -87,6 +87,18 @@ parameters:
resources:
cloud_init_wait_handle:
type: "AWS::CloudFormation::WaitConditionHandle"
cloud_init_wait_condition:
type: "AWS::CloudFormation::WaitCondition"
depends_on:
- swarm_master
properties:
Handle:
get_resource: cloud_init_wait_handle
Timeout: 6000
master_wait_handle:
type: "AWS::CloudFormation::WaitConditionHandle"
@ -179,7 +191,7 @@ resources:
str_replace:
template: {get_file: fragments/write-heat-params.yaml}
params:
"$WAIT_HANDLE": {get_resource: master_wait_handle}
"$WAIT_HANDLE": {get_resource: cloud_init_wait_handle}
"$HTTP_PROXY": {get_param: http_proxy}
"$HTTPS_PROXY": {get_param: https_proxy}
"$NO_PROXY": {get_param: no_proxy}
@ -214,6 +226,28 @@ resources:
group: ungrouped
config: {get_file: fragments/write-docker-service.sh}
write_swarm_agent_failure_service:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/write-bay-failure-service.yaml}
params:
"$SERVICE": swarm-agent
"$WAIT_HANDLE": {get_resource: agent_wait_handle}
write_swarm_manager_failure_service:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/write-bay-failure-service.yaml}
params:
"$SERVICE": swarm-manager
"$WAIT_HANDLE": {get_resource: master_wait_handle}
write_docker_socket:
type: "OS::Heat::SoftwareConfig"
properties:
@ -257,17 +291,14 @@ resources:
config:
str_replace:
template: {get_file: fragments/enable-services.sh}
params: &node_services
params:
"$NODE_SERVICES": "docker.socket swarm-agent swarm-manager"
cfn_signal:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/cfn-signal.sh}
params: *node_services
config: {get_file: fragments/cfn-signal.sh}
disable_selinux:
type: "OS::Heat::SoftwareConfig"
@ -289,6 +320,8 @@ resources:
- config: {get_resource: remove_docker_key}
- config: {get_resource: write_heat_params}
- config: {get_resource: make_cert}
- config: {get_resource: write_swarm_agent_failure_service}
- config: {get_resource: write_swarm_manager_failure_service}
- config: {get_resource: write_docker_service}
- config: {get_resource: write_docker_socket}
- config: {get_resource: write_swarm_agent_service}

View File

@ -75,16 +75,16 @@ parameters:
resources:
node_wait_handle:
node_cloud_init_wait_handle:
type: "AWS::CloudFormation::WaitConditionHandle"
node_wait_condition:
node_cloud_init_wait_condition:
type: "AWS::CloudFormation::WaitCondition"
depends_on:
- swarm_node
properties:
Handle:
get_resource: node_wait_handle
get_resource: node_cloud_init_wait_handle
Timeout: 6000
node_agent_wait_handle:
@ -126,7 +126,7 @@ resources:
str_replace:
template: {get_file: fragments/write-heat-params.yaml}
params:
"$WAIT_HANDLE": {get_resource: node_wait_handle}
"$WAIT_HANDLE": {get_resource: node_cloud_init_wait_handle}
"$HTTP_PROXY": {get_param: http_proxy}
"$HTTPS_PROXY": {get_param: https_proxy}
"$NO_PROXY": {get_param: no_proxy}
@ -167,6 +167,17 @@ resources:
group: ungrouped
config: {get_file: fragments/write-docker-socket.yaml}
write_swarm_agent_failure_service:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/write-bay-failure-service.yaml}
params:
"$SERVICE": swarm-agent
"$WAIT_HANDLE": {get_resource: node_agent_wait_handle}
write_swarm_agent_service:
type: "OS::Heat::SoftwareConfig"
properties:
@ -189,17 +200,14 @@ resources:
config:
str_replace:
template: {get_file: fragments/enable-services.sh}
params: &node_services
params:
"$NODE_SERVICES": "docker.socket swarm-agent"
cfn_signal:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/cfn-signal.sh}
params: *node_services
config: {get_file: fragments/cfn-signal.sh}
disable_selinux:
type: "OS::Heat::SoftwareConfig"
@ -221,6 +229,7 @@ resources:
- config: {get_resource: remove_docker_key}
- config: {get_resource: write_heat_params}
- config: {get_resource: make_cert}
- config: {get_resource: write_swarm_agent_failure_service}
- config: {get_resource: write_swarm_agent_service}
- config: {get_resource: write_docker_service}
- config: {get_resource: write_docker_socket}