Run etcd on all controllers for redundancy
Change-Id: If8d8d10691d80f5d46dfb8dcf23ae1779b028309
This commit is contained in:
parent
f8ae3ffa13
commit
8f0d8d149b
|
@ -6,28 +6,33 @@ import yaml
|
|||
|
||||
from pluginutils import NODES_CONFIG
|
||||
|
||||
RECONFIGURE_ROUTE_REFLECTOR = "##REPLACE_ON_INSTALL##/calico_route_reflector.sh"
|
||||
SCRIPTS_LOCATION="##REPLACE_ON_INSTALL##/"
|
||||
RECONFIGURE_ROUTE_REFLECTOR = SCRIPTS_LOCATION + "calico_route_reflector.sh"
|
||||
UPDATE_ETCD_CLUSTER = SCRIPTS_LOCATION + "update_etcd_cluster.sh"
|
||||
|
||||
|
||||
def _get_configured_nodes():
|
||||
def _get_configured_nodes(roles):
|
||||
with open(NODES_CONFIG, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
nodes = [node for node in config["nodes"] if node["role"] in [
|
||||
"compute", "controller", "primary-controller"]]
|
||||
return [node for node in config["nodes"] if node["role"] in roles]
|
||||
|
||||
# There is no need to reconfigure the route reflector for a change in
|
||||
# primary controller, so we don't keep track of which controller is the
|
||||
# current primary.
|
||||
primary_controller_index = None
|
||||
|
||||
def _get_compute_nodes():
|
||||
return _get_configured_nodes(["compute"])
|
||||
|
||||
|
||||
def _get_control_nodes():
|
||||
nodes = _get_configured_nodes(["controller", "primary-controller"])
|
||||
|
||||
primary_index = None
|
||||
for (index, node) in enumerate(nodes):
|
||||
if node["role"] == "primary-controller":
|
||||
primary_controller_index = index
|
||||
primary_index = index
|
||||
break
|
||||
|
||||
# Note the index could be 0 - hence 'if x is not None' rather than 'if x'
|
||||
if primary_controller_index is not None:
|
||||
nodes[primary_controller_index]["role"] = "controller"
|
||||
if primary_index is not None:
|
||||
nodes[primary_index]["role"] = "controller"
|
||||
|
||||
return nodes
|
||||
|
||||
|
@ -35,13 +40,22 @@ def _get_configured_nodes():
|
|||
class DeploymentChangeHandler(pyinotify.ProcessEvent):
|
||||
def __init__(self):
|
||||
super(DeploymentChangeHandler, self).__init__()
|
||||
self.nodes = _get_configured_nodes()
|
||||
self.compute_nodes = _get_compute_nodes()
|
||||
self.control_nodes = _get_control_nodes()
|
||||
|
||||
def process_IN_MODIFY(self, event):
|
||||
current_nodes = _get_configured_nodes()
|
||||
if current_nodes != self.nodes:
|
||||
current_compute_nodes = _get_compute_nodes()
|
||||
current_control_nodes = _get_control_nodes()
|
||||
|
||||
if current_control_nodes != self.control_nodes:
|
||||
subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
|
||||
self.nodes = current_nodes
|
||||
subprocess.call(UPDATE_ETCD_CLUSTER)
|
||||
|
||||
elif current_compute_nodes != self.compute_nodes:
|
||||
subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
|
||||
|
||||
self.compute_nodes = current_compute_nodes
|
||||
self.control_nodes = current_control_nodes
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -52,6 +52,11 @@ apt-get update
|
|||
|
||||
apt-get -y install etcd
|
||||
|
||||
for controller_address in ${controller_node_addresses[@]};do
|
||||
initial_cluster+="${controller_address}=http://${controller_address}:2380,"
|
||||
done
|
||||
initial_cluster=${initial_cluster::-1} # remove trailing comma
|
||||
|
||||
service etcd stop
|
||||
rm -rf /var/lib/etcd/*
|
||||
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
|
||||
|
@ -60,7 +65,7 @@ cat << EXEC_CMD >> /etc/init/etcd.conf
|
|||
exec /usr/bin/etcd -proxy on \\
|
||||
-listen-client-urls http://127.0.0.1:4001 \\
|
||||
-advertise-client-urls http://127.0.0.1:7001 \\
|
||||
-initial-cluster controller=http://${controller_node_addresses}:2380
|
||||
-initial-cluster ${initial_cluster}
|
||||
EXEC_CMD
|
||||
service etcd start
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ set -x
|
|||
echo "Hi, I'm a controller node!"
|
||||
|
||||
this_node_address=$(python get_node_ip.py `hostname`)
|
||||
controller_node_addresses=$(python get_node_ips_by_role.py controller)
|
||||
|
||||
# Get APT key for binaries.projectcalico.org.
|
||||
|
||||
|
@ -51,19 +52,25 @@ apt-get update
|
|||
|
||||
apt-get -y install etcd
|
||||
|
||||
for controller_address in ${controller_node_addresses[@]};do
|
||||
initial_cluster+="${controller_address}=http://${controller_address}:2380,"
|
||||
done
|
||||
initial_cluster=${initial_cluster::-1} # remove trailing comma
|
||||
|
||||
service etcd stop
|
||||
rm -rf /var/lib/etcd/*
|
||||
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
|
||||
mv tmp /etc/init/etcd.conf
|
||||
cat << EXEC_CMD >> /etc/init/etcd.conf
|
||||
exec /usr/bin/etcd -name controller \\
|
||||
exec /usr/bin/etcd -name ${this_node_address} \\
|
||||
-advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
|
||||
-listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001" \\
|
||||
-listen-peer-urls "http://0.0.0.0:2380" \\
|
||||
-initial-advertise-peer-urls "http://${this_node_address}:2380" \\
|
||||
-initial-cluster-token fuel-cluster-1 \\
|
||||
-initial-cluster controller=http://${this_node_address}:2380 \\
|
||||
-initial-cluster ${initial_cluster} \\
|
||||
-initial-cluster-state new
|
||||
|
||||
EXEC_CMD
|
||||
|
||||
service etcd start
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2015 Metaswitch Networks
|
||||
|
||||
caller=$1
|
||||
node_address=$2
|
||||
initial_cluster=$3
|
||||
|
||||
CALLED_BY=/tmp/etcd_cfg_modifiers
|
||||
touch ${CALLED_BY}
|
||||
num_callers=$(wc -l < ${CALLED_BY})
|
||||
if [[ $num_callers != 0 ]]; then
|
||||
# Someone else has already run this script - exit.
|
||||
exit
|
||||
fi
|
||||
|
||||
echo ${caller} >> ${CALLED_BY}
|
||||
sleep 1
|
||||
num_callers=$(wc -l < ${CALLED_BY})
|
||||
if [[ $num_callers > 1 ]]; then
|
||||
# Someone else is also trying to run this script, back off unless the caller wins an arbitrary
|
||||
# tiebreak of an alphabetical sort.
|
||||
callers=$(cat ${CALLED_BY} | sort)
|
||||
if [[ "$caller" != "${callers[0]}" ]]; then
|
||||
exit
|
||||
fi
|
||||
fi
|
||||
|
||||
service etcd stop
|
||||
rm -rf /var/lib/etcd/*
|
||||
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
|
||||
mv tmp /etc/init/etcd.conf
|
||||
cat << EXEC_CMD >> /etc/init/etcd.conf
|
||||
exec /usr/bin/etcd -name ${node_address} \\
|
||||
-advertise-client-urls "http://${node_address}:2379,http://${node_address}:4001" \\
|
||||
-listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001" \\
|
||||
-listen-peer-urls "http://0.0.0.0:2380" \\
|
||||
-initial-advertise-peer-urls "http://${node_address}:2380" \\
|
||||
-initial-cluster-token fuel-cluster-1 \\
|
||||
-initial-cluster ${initial_cluster} \\
|
||||
-initial-cluster-state existing
|
||||
EXEC_CMD
|
||||
service etcd start
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2015 Metaswitch Networks
|
||||
|
||||
this_node_address=$(python get_node_ip.py `hostname`)
|
||||
controller_node_addresses=$(python get_node_ips_by_role.py controller)
|
||||
|
||||
for node_address in ${controller_node_addresses[@]}; do
|
||||
initial_cluster+="${node_address}=http://${node_address}:2380,"
|
||||
done
|
||||
|
||||
initial_cluster=${initial_cluster::-1} # remove trailing comma
|
||||
|
||||
service etcd stop
|
||||
rm -rf /var/lib/etcd/*
|
||||
awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
|
||||
mv tmp /etc/init/etcd.conf
|
||||
cat << EXEC_CMD >> /etc/init/etcd.conf
|
||||
exec /usr/bin/etcd -name ${this_node_address} \\
|
||||
-advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
|
||||
-listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001" \\
|
||||
-listen-peer-urls "http://0.0.0.0:2380" \\
|
||||
-initial-advertise-peer-urls "http://${this_node_address}:2380" \\
|
||||
-initial-cluster-token fuel-cluster-1 \\
|
||||
-initial-cluster ${initial_cluster} \\
|
||||
-initial-cluster-state new
|
||||
|
||||
EXEC_CMD
|
||||
service etcd start
|
||||
|
||||
retry_count=0
|
||||
while [[ $retry_count < 5 ]]; do
|
||||
etcdctl cluster-health
|
||||
if [[ $? == 0 ]]; then
|
||||
break
|
||||
else
|
||||
((retry_count++))
|
||||
service etcd restart
|
||||
sleep 2
|
||||
fi
|
||||
done
|
||||
|
Loading…
Reference in New Issue