Gate: Fix docker swarm disconnect issue
The swarm func test occasionally failed with the error below. This error cannot be determinately reproduced. After some experiments, it seems that swarm will abort connections during registration of a new swarm agent. ConnectionError: ('Connection aborted.', BadStatusLine("''",)) This commit tries to fix the issue by waiting for the completion of agent registration. After the swarm agent service starts, it checks ETCD to ensure the agent was successfully registered before sending signal to Heat to indicate its success. Closes-Bug: #1521395 Change-Id: Iec1772d1df7d85e367676758b1f97a5b604c0eb7
This commit is contained in:
parent
1cb84d0fd4
commit
8733cd37fa
|
@ -23,3 +23,5 @@ write_files:
|
|||
FLANNEL_USE_VXLAN="$FLANNEL_USE_VXLAN"
|
||||
ETCD_SERVER_IP="$ETCD_SERVER_IP"
|
||||
API_IP_ADDRESS="$API_IP_ADDRESS"
|
||||
SWARM_VERSION="$SWARM_VERSION"
|
||||
AGENT_WAIT_HANDLE="$AGENT_WAIT_HANDLE"
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
#!/bin/sh
|
||||
|
||||
. /etc/sysconfig/heat-params
|
||||
|
||||
myip=$(ip addr show eth0 |
|
||||
awk '$1 == "inet" {print $2}' | cut -f1 -d/)
|
||||
|
||||
CONF_FILE=/etc/systemd/system/swarm-agent.service
|
||||
|
||||
cat > $CONF_FILE << EOF
|
||||
[Unit]
|
||||
Description=Swarm Agent
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
OnFailure=swarm-agent-failure.service
|
||||
|
||||
[Service]
|
||||
TimeoutStartSec=0
|
||||
ExecStartPre=-/usr/bin/docker kill swarm-agent
|
||||
ExecStartPre=-/usr/bin/docker rm swarm-agent
|
||||
ExecStartPre=-/usr/bin/docker pull swarm:$SWARM_VERSION
|
||||
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY \\
|
||||
-e https_proxy=$HTTPS_PROXY \\
|
||||
-e no_proxy=$NO_PROXY \\
|
||||
--name swarm-agent \\
|
||||
swarm:$SWARM_VERSION \\
|
||||
join \\
|
||||
--addr $myip:2375 \\
|
||||
etcd://$ETCD_SERVER_IP:2379/v2/keys/swarm/
|
||||
ExecStop=/usr/bin/docker stop swarm-agent
|
||||
ExecStartPost=/usr/local/bin/notify-heat
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
chown root:root $CONF_FILE
|
||||
chmod 644 $CONF_FILE
|
||||
|
||||
SCRIPT=/usr/local/bin/notify-heat
|
||||
|
||||
cat > $SCRIPT << EOF
|
||||
#!/bin/sh
|
||||
until etcdctl --peers $ETCD_SERVER_IP:2379 ls /v2/keys/swarm/docker/swarm/nodes/$myip:2375
|
||||
do
|
||||
echo "Waiting for swarm agent registration..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
curl -sf -X PUT -H 'Content-Type: application/json' \
|
||||
--data-binary '{"Status": "SUCCESS", "Reason": "Swarm agent ready", "Data": "OK", "UniqueId": "00000"}' \
|
||||
"$AGENT_WAIT_HANDLE"
|
||||
EOF
|
||||
|
||||
chown root:root $SCRIPT
|
||||
chmod 755 $SCRIPT
|
|
@ -1,26 +0,0 @@
|
|||
#cloud-config
|
||||
merge_how: dict(recurse_array)+list(append)
|
||||
write_files:
|
||||
- path: /etc/systemd/system/swarm-agent.service
|
||||
owner: "root:root"
|
||||
permissions: "0644"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Swarm Agent
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
OnFailure=swarm-agent-failure.service
|
||||
|
||||
[Service]
|
||||
TimeoutStartSec=0
|
||||
ExecStartPre=-/usr/bin/docker kill swarm-agent
|
||||
ExecStartPre=-/usr/bin/docker rm swarm-agent
|
||||
ExecStartPre=-/usr/bin/docker pull swarm:$SWARM_VERSION
|
||||
ExecStart=/usr/bin/docker run -e http_proxy=$HTTP_PROXY -e https_proxy=$HTTPS_PROXY -e no_proxy=$NO_PROXY --name swarm-agent swarm:$SWARM_VERSION join --addr $NODE_IP:2375 etcd://$ETCD_SERVER_IP:2379/v2/keys/swarm/
|
||||
ExecStop=/usr/bin/docker stop swarm-agent
|
||||
ExecStartPost=/usr/bin/curl -sf -X PUT -H 'Content-Type: application/json' \
|
||||
--data-binary '{"Status": "SUCCESS", "Reason": "Setup complete", "Data": "OK", "UniqueId": "00000"}' \
|
||||
"$WAIT_HANDLE"
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -3,8 +3,8 @@
|
|||
cat > /etc/systemd/system/swarm-manager.service << END_SERVICE_TOP
|
||||
[Unit]
|
||||
Description=Swarm Manager
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
After=docker.service etcd.service
|
||||
Requires=docker.service etcd.service
|
||||
OnFailure=swarm-manager-failure.service
|
||||
|
||||
[Service]
|
||||
|
|
|
@ -145,6 +145,8 @@ resources:
|
|||
"$NETWORK_DRIVER": {get_param: network_driver}
|
||||
"$ETCD_SERVER_IP": {get_param: etcd_server_ip}
|
||||
"$API_IP_ADDRESS": {get_param: api_ip_address}
|
||||
"$SWARM_VERSION": {get_param: swarm_version}
|
||||
"$AGENT_WAIT_HANDLE": {get_resource: node_agent_wait_handle}
|
||||
|
||||
configure_swarm:
|
||||
type: "OS::Heat::SoftwareConfig"
|
||||
|
@ -203,18 +205,7 @@ resources:
|
|||
type: "OS::Heat::SoftwareConfig"
|
||||
properties:
|
||||
group: ungrouped
|
||||
config:
|
||||
str_replace:
|
||||
template: {get_file: fragments/write-swarm-agent-service.yaml}
|
||||
params:
|
||||
"$NODE_IP": {get_attr: [swarm_node_eth0, fixed_ips, 0, ip_address]}
|
||||
"$DISCOVERY_URL": {get_param: discovery_url}
|
||||
"$WAIT_HANDLE": {get_resource: node_agent_wait_handle}
|
||||
"$HTTP_PROXY": {get_param: http_proxy}
|
||||
"$HTTPS_PROXY": {get_param: https_proxy}
|
||||
"$NO_PROXY": {get_param: no_proxy}
|
||||
"$SWARM_VERSION": {get_param: swarm_version}
|
||||
"$ETCD_SERVER_IP": {get_param: etcd_server_ip}
|
||||
config: {get_file: fragments/write-swarm-agent-service.sh}
|
||||
|
||||
enable_services:
|
||||
type: "OS::Heat::SoftwareConfig"
|
||||
|
|
Loading…
Reference in New Issue