diff --git a/magnum/templates/docker-swarm/fragments/cfn-signal.sh b/magnum/templates/docker-swarm/fragments/cfn-signal.sh index 19993235d3..18b6e9a48c 100644 --- a/magnum/templates/docker-swarm/fragments/cfn-signal.sh +++ b/magnum/templates/docker-swarm/fragments/cfn-signal.sh @@ -7,22 +7,6 @@ echo "notifying heat" STATUS="SUCCESS" REASON="Setup complete" DATA="OK" -FAILED_SERVICE="" - -for service in $NODE_SERVICES; do - echo "checking service status for $service" - systemctl status $service - if [[ $? -ne 0 ]]; then - echo "$service is not active, the cluster is not valid" - FAILED_SERVICE="$FAILED_SERVICE $service" - fi -done - -if [[ -n $FAILED_SERVICE ]]; then - STATUS="FAILURE" - REASON="Setup failed, $FAILED_SERVICE not start up correctly." - DATA="Failed" -fi data=$(echo '{"Status": "'${STATUS}'", "Reason": "'$REASON'", "Data": "'${DATA}'", "UniqueId": "00000"}') diff --git a/magnum/templates/docker-swarm/fragments/write-bay-failure-service.yaml b/magnum/templates/docker-swarm/fragments/write-bay-failure-service.yaml new file mode 100644 index 0000000000..6dfde5eccc --- /dev/null +++ b/magnum/templates/docker-swarm/fragments/write-bay-failure-service.yaml @@ -0,0 +1,16 @@ +#cloud-config +merge_how: dict(recurse_array)+list(append) +write_files: + - path: /etc/systemd/system/$SERVICE-failure.service + owner: "root:root" + permissions: "0644" + content: | + [Unit] + Description=$SERVICE Failure Notifier + + [Service] + Type=simple + TimeoutStartSec=0 + ExecStart=/usr/bin/curl -sf -X PUT -H 'Content-Type: application/json' \ + --data-binary '{"Status": "FAILURE", "Reason": "$SERVICE service failed to start.", "Data": "OK", "UniqueId": "00000"}' \ + "$WAIT_HANDLE" diff --git a/magnum/templates/docker-swarm/fragments/write-swarm-agent-service.yaml b/magnum/templates/docker-swarm/fragments/write-swarm-agent-service.yaml index 85ca284f77..d459242e1c 100644 --- a/magnum/templates/docker-swarm/fragments/write-swarm-agent-service.yaml +++ b/magnum/templates/docker-swarm/fragments/write-swarm-agent-service.yaml @@ -9,12 +9,13 @@ write_files: Description=Swarm Agent After=docker.service Requires=docker.service + OnFailure=swarm-agent-failure.service [Service] TimeoutStartSec=0 ExecStartPre=-/usr/bin/docker kill swarm-agent ExecStartPre=-/usr/bin/docker rm swarm-agent - ExecStartPre=/usr/bin/docker pull swarm:0.2.0 + ExecStartPre=-/usr/bin/docker pull swarm:0.2.0 #TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:0.2.0 join --addr $NODE_IP:2375 $DISCOVERY_URL ExecStop=/usr/bin/docker stop swarm-agent diff --git a/magnum/templates/docker-swarm/fragments/write-swarm-master-service.sh b/magnum/templates/docker-swarm/fragments/write-swarm-master-service.sh index bd3e99a16c..27956786d8 100644 --- a/magnum/templates/docker-swarm/fragments/write-swarm-master-service.sh +++ b/magnum/templates/docker-swarm/fragments/write-swarm-master-service.sh @@ -5,12 +5,13 @@ cat > /etc/systemd/system/swarm-manager.service << END_SERVICE_TOP Description=Swarm Manager After=docker.service Requires=docker.service +OnFailure=swarm-manager-failure.service [Service] TimeoutStartSec=0 ExecStartPre=-/usr/bin/docker kill swarm-manager ExecStartPre=-/usr/bin/docker rm swarm-manager -ExecStartPre=/usr/bin/docker pull swarm:0.2.0 +ExecStartPre=-/usr/bin/docker pull swarm:0.2.0 #TODO: roll-back from swarm:0.2.0 to swarm if atomic image can work with latest swarm image ExecStart=/usr/bin/docker run --name swarm-manager \\ -v /etc/docker:/etc/docker \\ diff --git a/magnum/templates/docker-swarm/swarm.yaml b/magnum/templates/docker-swarm/swarm.yaml index 11a579e3bc..62d2e16012 100644 --- a/magnum/templates/docker-swarm/swarm.yaml +++ b/magnum/templates/docker-swarm/swarm.yaml @@ -87,6 +87,18 @@ parameters: resources: + cloud_init_wait_handle: + type: "AWS::CloudFormation::WaitConditionHandle" + + cloud_init_wait_condition: + type: "AWS::CloudFormation::WaitCondition" + depends_on: + - swarm_master + properties: + Handle: + get_resource: cloud_init_wait_handle + Timeout: 6000 + master_wait_handle: type: "AWS::CloudFormation::WaitConditionHandle" @@ -179,7 +191,7 @@ resources: str_replace: template: {get_file: fragments/write-heat-params.yaml} params: - "$WAIT_HANDLE": {get_resource: master_wait_handle} + "$WAIT_HANDLE": {get_resource: cloud_init_wait_handle} "$HTTP_PROXY": {get_param: http_proxy} "$HTTPS_PROXY": {get_param: https_proxy} "$NO_PROXY": {get_param: no_proxy} @@ -214,6 +226,28 @@ resources: group: ungrouped config: {get_file: fragments/write-docker-service.sh} + write_swarm_agent_failure_service: + type: "OS::Heat::SoftwareConfig" + properties: + group: ungrouped + config: + str_replace: + template: {get_file: fragments/write-bay-failure-service.yaml} + params: + "$SERVICE": swarm-agent + "$WAIT_HANDLE": {get_resource: agent_wait_handle} + + write_swarm_manager_failure_service: + type: "OS::Heat::SoftwareConfig" + properties: + group: ungrouped + config: + str_replace: + template: {get_file: fragments/write-bay-failure-service.yaml} + params: + "$SERVICE": swarm-manager + "$WAIT_HANDLE": {get_resource: master_wait_handle} + write_docker_socket: type: "OS::Heat::SoftwareConfig" properties: @@ -257,17 +291,14 @@ resources: config: str_replace: template: {get_file: fragments/enable-services.sh} - params: &node_services + params: "$NODE_SERVICES": "docker.socket swarm-agent swarm-manager" cfn_signal: type: "OS::Heat::SoftwareConfig" properties: group: ungrouped - config: - str_replace: - template: {get_file: fragments/cfn-signal.sh} - params: *node_services + config: {get_file: fragments/cfn-signal.sh} disable_selinux: type: "OS::Heat::SoftwareConfig" @@ -289,6 +320,8 @@ resources: - config: {get_resource: remove_docker_key} - config: {get_resource: write_heat_params} - config: {get_resource: make_cert} + - config: {get_resource: write_swarm_agent_failure_service} + - config: {get_resource: write_swarm_manager_failure_service} - config: {get_resource: write_docker_service} - config: {get_resource: write_docker_socket} - config: {get_resource: write_swarm_agent_service} diff --git a/magnum/templates/docker-swarm/swarmnode.yaml b/magnum/templates/docker-swarm/swarmnode.yaml index 4deb30cd8a..229b164335 100644 --- a/magnum/templates/docker-swarm/swarmnode.yaml +++ b/magnum/templates/docker-swarm/swarmnode.yaml @@ -75,16 +75,16 @@ parameters: resources: - node_wait_handle: + node_cloud_init_wait_handle: type: "AWS::CloudFormation::WaitConditionHandle" - node_wait_condition: + node_cloud_init_wait_condition: type: "AWS::CloudFormation::WaitCondition" depends_on: - swarm_node properties: Handle: - get_resource: node_wait_handle + get_resource: node_cloud_init_wait_handle Timeout: 6000 node_agent_wait_handle: @@ -126,7 +126,7 @@ resources: str_replace: template: {get_file: fragments/write-heat-params.yaml} params: - "$WAIT_HANDLE": {get_resource: node_wait_handle} + "$WAIT_HANDLE": {get_resource: node_cloud_init_wait_handle} "$HTTP_PROXY": {get_param: http_proxy} "$HTTPS_PROXY": {get_param: https_proxy} "$NO_PROXY": {get_param: no_proxy} @@ -167,6 +167,17 @@ resources: group: ungrouped config: {get_file: fragments/write-docker-socket.yaml} + write_swarm_agent_failure_service: + type: "OS::Heat::SoftwareConfig" + properties: + group: ungrouped + config: + str_replace: + template: {get_file: fragments/write-bay-failure-service.yaml} + params: + "$SERVICE": swarm-agent + "$WAIT_HANDLE": {get_resource: node_agent_wait_handle} + write_swarm_agent_service: type: "OS::Heat::SoftwareConfig" properties: @@ -189,17 +200,14 @@ resources: config: str_replace: template: {get_file: fragments/enable-services.sh} - params: &node_services + params: "$NODE_SERVICES": "docker.socket swarm-agent" cfn_signal: type: "OS::Heat::SoftwareConfig" properties: group: ungrouped - config: - str_replace: - template: {get_file: fragments/cfn-signal.sh} - params: *node_services + config: {get_file: fragments/cfn-signal.sh} disable_selinux: type: "OS::Heat::SoftwareConfig" @@ -221,6 +229,7 @@ resources: - config: {get_resource: remove_docker_key} - config: {get_resource: write_heat_params} - config: {get_resource: make_cert} + - config: {get_resource: write_swarm_agent_failure_service} - config: {get_resource: write_swarm_agent_service} - config: {get_resource: write_docker_service} - config: {get_resource: write_docker_socket}