Gate: Fix docker swarm disconnect issue

The swarm func test occasionally failed with the error below. This
error cannot be determinately reproduced. After some experiments,
it seems that swarm will abort connections during registration of
a new swarm agent.

ConnectionError: ('Connection aborted.', BadStatusLine("''",))

This commit tries to fix the issue by waiting for the completion of
agent registration. After the swarm agent service starts, it checks
ETCD to ensure the agent was successfully registered before sending
signal to Heat to indicate its success.

Closes-Bug: #1521395
Change-Id: Iec1772d1df7d85e367676758b1f97a5b604c0eb7
This commit is contained in:
Hongbin Lu 2015-12-06 16:03:09 -05:00
parent 1cb84d0fd4
commit 8733cd37fa
5 changed files with 63 additions and 40 deletions

View File

@ -23,3 +23,5 @@ write_files:
FLANNEL_USE_VXLAN="$FLANNEL_USE_VXLAN"
ETCD_SERVER_IP="$ETCD_SERVER_IP"
API_IP_ADDRESS="$API_IP_ADDRESS"
SWARM_VERSION="$SWARM_VERSION"
AGENT_WAIT_HANDLE="$AGENT_WAIT_HANDLE"

View File

@ -0,0 +1,56 @@
#!/bin/sh
. /etc/sysconfig/heat-params
myip=$(ip addr show eth0 |
awk '$1 == "inet" {print $2}' | cut -f1 -d/)
CONF_FILE=/etc/systemd/system/swarm-agent.service
cat > $CONF_FILE << EOF
[Unit]
Description=Swarm Agent
After=docker.service
Requires=docker.service
OnFailure=swarm-agent-failure.service
[Service]
TimeoutStartSec=0
ExecStartPre=-/usr/bin/docker kill swarm-agent
ExecStartPre=-/usr/bin/docker rm swarm-agent
ExecStartPre=-/usr/bin/docker pull swarm:$SWARM_VERSION
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY \\
-e https_proxy=$HTTPS_PROXY \\
-e no_proxy=$NO_PROXY \\
--name swarm-agent \\
swarm:$SWARM_VERSION \\
join \\
--addr $myip:2375 \\
etcd://$ETCD_SERVER_IP:2379/v2/keys/swarm/
ExecStop=/usr/bin/docker stop swarm-agent
ExecStartPost=/usr/local/bin/notify-heat
[Install]
WantedBy=multi-user.target
EOF
chown root:root $CONF_FILE
chmod 644 $CONF_FILE
SCRIPT=/usr/local/bin/notify-heat
cat > $SCRIPT << EOF
#!/bin/sh
until etcdctl --peers $ETCD_SERVER_IP:2379 ls /v2/keys/swarm/docker/swarm/nodes/$myip:2375
do
echo "Waiting for swarm agent registration..."
sleep 5
done
curl -sf -X PUT -H 'Content-Type: application/json' \
--data-binary '{"Status": "SUCCESS", "Reason": "Swarm agent ready", "Data": "OK", "UniqueId": "00000"}' \
"$AGENT_WAIT_HANDLE"
EOF
chown root:root $SCRIPT
chmod 755 $SCRIPT

View File

@ -1,26 +0,0 @@
#cloud-config
merge_how: dict(recurse_array)+list(append)
write_files:
- path: /etc/systemd/system/swarm-agent.service
owner: "root:root"
permissions: "0644"
content: |
[Unit]
Description=Swarm Agent
After=docker.service
Requires=docker.service
OnFailure=swarm-agent-failure.service
[Service]
TimeoutStartSec=0
ExecStartPre=-/usr/bin/docker kill swarm-agent
ExecStartPre=-/usr/bin/docker rm swarm-agent
ExecStartPre=-/usr/bin/docker pull swarm:$SWARM_VERSION
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:$SWARM_VERSION join --addr $NODE_IP:2375 etcd://$ETCD_SERVER_IP:2379/v2/keys/swarm/
ExecStop=/usr/bin/docker stop swarm-agent
ExecStartPost=/usr/bin/curl -sf -X PUT -H 'Content-Type: application/json' \
--data-binary '{"Status": "SUCCESS", "Reason": "Setup complete", "Data": "OK", "UniqueId": "00000"}' \
"$WAIT_HANDLE"
[Install]
WantedBy=multi-user.target

View File

@ -3,8 +3,8 @@
cat > /etc/systemd/system/swarm-manager.service << END_SERVICE_TOP
[Unit]
Description=Swarm Manager
After=docker.service
Requires=docker.service
After=docker.service etcd.service
Requires=docker.service etcd.service
OnFailure=swarm-manager-failure.service
[Service]

View File

@ -145,6 +145,8 @@ resources:
"$NETWORK_DRIVER": {get_param: network_driver}
"$ETCD_SERVER_IP": {get_param: etcd_server_ip}
"$API_IP_ADDRESS": {get_param: api_ip_address}
"$SWARM_VERSION": {get_param: swarm_version}
"$AGENT_WAIT_HANDLE": {get_resource: node_agent_wait_handle}
configure_swarm:
type: "OS::Heat::SoftwareConfig"
@ -203,18 +205,7 @@ resources:
type: "OS::Heat::SoftwareConfig"
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/write-swarm-agent-service.yaml}
params:
"$NODE_IP": {get_attr: [swarm_node_eth0, fixed_ips, 0, ip_address]}
"$DISCOVERY_URL": {get_param: discovery_url}
"$WAIT_HANDLE": {get_resource: node_agent_wait_handle}
"$HTTP_PROXY": {get_param: http_proxy}
"$HTTPS_PROXY": {get_param: https_proxy}
"$NO_PROXY": {get_param: no_proxy}
"$SWARM_VERSION": {get_param: swarm_version}
"$ETCD_SERVER_IP": {get_param: etcd_server_ip}
config: {get_file: fragments/write-swarm-agent-service.sh}
enable_services:
type: "OS::Heat::SoftwareConfig"