Browse Source

Add support for multi-controller HA deployments.

Change-Id: I819cec71cdbc8df7a85bd4f41f36493b34c8bdcc
Emma Gordon 3 years ago
parent
commit
e6c8fab962

+ 35
- 9
deployment_scripts/calico-fuel-monitor View File

@@ -6,29 +6,55 @@ import yaml
6 6
 
7 7
 from pluginutils import NODES_CONFIG
8 8
 
9
-RECONFIGURE_ROUTE_REFLECTOR = "##REPLACE_ON_INSTALL##/calico_route_reflector.sh"
9
+SCRIPTS_LOCATION="##REPLACE_ON_INSTALL##/"
10
+RECONFIGURE_ROUTE_REFLECTOR = SCRIPTS_LOCATION + "calico_route_reflector.sh"
11
+UPDATE_ETCD_CLUSTER = SCRIPTS_LOCATION + "update_etcd_cluster.sh"
10 12
 
11 13
 
12
-def _get_configured_compute_nodes():
14
+def _get_configured_nodes(roles):
13 15
     with open(NODES_CONFIG, "r") as f:
14 16
         config = yaml.safe_load(f)
15 17
 
16
-    compute_nodes = [node for node in config["nodes"]
17
-                     if node["role"] == "compute"]
18
+    return [node for node in config["nodes"] if node["role"] in roles]
18 19
 
19
-    return compute_nodes
20
+
21
+def _get_compute_nodes():
22
+    return _get_configured_nodes(["compute"])
23
+
24
+
25
+def _get_control_nodes():
26
+    nodes = _get_configured_nodes(["controller", "primary-controller"])
27
+
28
+    for node in nodes:
29
+        # Note this does not change the node role in the Fuel deployment, just
30
+        # in the list of nodes internal to this script (where we are only
31
+        # concerned with the distinction between compute/control nodes, not
32
+        # whether a given control node is primary or not).
33
+       if node["role"] == "primary-controller":
34
+            node["role"] = "controller"
35
+
36
+    return nodes
20 37
 
21 38
 
22 39
 class DeploymentChangeHandler(pyinotify.ProcessEvent):
23 40
     def __init__(self):
24 41
         super(DeploymentChangeHandler, self).__init__()
25
-        self.compute_nodes = _get_configured_compute_nodes()
42
+        self.compute_nodes = _get_compute_nodes()
43
+        self.control_nodes = _get_control_nodes()
26 44
 
27 45
     def process_IN_MODIFY(self, event):
28
-        current_compute_nodes = _get_configured_compute_nodes()
29
-        if current_compute_nodes != self.compute_nodes:
46
+        current_compute_nodes = _get_compute_nodes()
47
+        current_control_nodes = _get_control_nodes()
48
+
49
+        if current_control_nodes != self.control_nodes:
50
+            subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
51
+            subprocess.call(UPDATE_ETCD_CLUSTER)
52
+
53
+        elif current_compute_nodes != self.compute_nodes:
30 54
             subprocess.call(RECONFIGURE_ROUTE_REFLECTOR)
31
-            self.compute_nodes = current_compute_nodes
55
+
56
+        self.compute_nodes = current_compute_nodes
57
+        self.control_nodes = current_control_nodes
32 58
 
33 59
 
34 60
 if __name__ == "__main__":

+ 9
- 3
deployment_scripts/calico_compute.sh View File

@@ -10,7 +10,7 @@ set -x
10 10
 echo "Hi, I'm a compute node!"
11 11
 
12 12
 this_node_address=$(python get_node_ip.py `hostname`)
13
-controller_node_address=$(python get_controller_ip.py)
13
+controller_node_addresses=$(python get_node_ips_by_role.py controller)
14 14
 
15 15
 # Get APT key for binaries.projectcalico.org.
16 16
 
@@ -52,6 +52,12 @@ apt-get update
52 52
 
53 53
 apt-get -y install etcd
54 54
 
55
+for controller_address in ${controller_node_addresses[@]}
56
+do
57
+  initial_cluster+="${controller_address}=http://${controller_address}:2380,"
58
+done
59
+initial_cluster=${initial_cluster::-1} # remove trailing comma
60
+
55 61
 service etcd stop
56 62
 rm -rf /var/lib/etcd/*
57 63
 awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
@@ -60,7 +66,7 @@ cat << EXEC_CMD >> /etc/init/etcd.conf
60 66
 exec /usr/bin/etcd -proxy on                                                         \\
61 67
                    -listen-client-urls http://127.0.0.1:4001                         \\
62 68
                    -advertise-client-urls http://127.0.0.1:7001                      \\
63
-                   -initial-cluster controller=http://${controller_node_address}:2380
69
+                   -initial-cluster ${initial_cluster}
64 70
 EXEC_CMD
65 71
 service etcd start
66 72
 
@@ -143,7 +149,7 @@ apt-get -y install calico-compute bird
143 149
 # script. You should consult the relevant documentation for your chosen BGP
144 150
 # stack.
145 151
 
146
-calico-gen-bird-conf.sh $this_node_address $controller_node_address 64511
152
+calico-gen-bird-mesh-conf.sh $this_node_address 64511 ${controller_node_addresses[@]}
147 153
 
148 154
 # Edit the /etc/calico/felix.cfg file:
149 155
 #     Change the MetadataAddr setting to 127.0.0.1.

+ 10
- 2
deployment_scripts/calico_controller.sh View File

@@ -10,6 +10,7 @@ set -x
10 10
 echo "Hi, I'm a controller node!"
11 11
 
12 12
 this_node_address=$(python get_node_ip.py `hostname`)
13
+controller_node_addresses=$(python get_node_ips_by_role.py controller)
13 14
 
14 15
 # Get APT key for binaries.projectcalico.org.
15 16
 
@@ -51,19 +52,26 @@ apt-get update
51 52
 
52 53
 apt-get -y install etcd
53 54
 
55
+for controller_address in ${controller_node_addresses[@]}
56
+do
57
+  initial_cluster+="${controller_address}=http://${controller_address}:2380,"
58
+done
59
+initial_cluster=${initial_cluster::-1} # remove trailing comma
60
+
54 61
 service etcd stop
55 62
 rm -rf /var/lib/etcd/*
56 63
 awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
57 64
 mv tmp /etc/init/etcd.conf
58 65
 cat << EXEC_CMD >> /etc/init/etcd.conf
59
-exec /usr/bin/etcd -name controller                                                                           \\
66
+exec /usr/bin/etcd -name ${this_node_address}                                                                 \\
60 67
                    -advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
61 68
                    -listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001"                              \\
62 69
                    -listen-peer-urls "http://0.0.0.0:2380"                                                    \\
63 70
                    -initial-advertise-peer-urls "http://${this_node_address}:2380"                            \\
64 71
                    -initial-cluster-token fuel-cluster-1                                                      \\
65
-                   -initial-cluster controller=http://${this_node_address}:2380                               \\
72
+                   -initial-cluster ${initial_cluster}                                                        \\
66 73
                    -initial-cluster-state new
74
+
67 75
 EXEC_CMD
68 76
 
69 77
 service etcd start

+ 20
- 9
deployment_scripts/calico_route_reflector.sh View File

@@ -8,8 +8,10 @@ set -x
8 8
 echo "Hi, I'm a route_reflector node!"
9 9
 
10 10
 this_node_address=$(python get_node_ip.py `hostname`)
11
+controller_node_addresses=$(python get_node_ips_by_role.py controller)
11 12
 
12
-bgp_peers=$(python get_rr_peers.py)
13
+client_peers=$(python get_node_ips_by_role.py compute)
14
+route_reflector_peers=("${controller_node_addresses[@]/$this_node_address}")
13 15
 
14 16
 # Generate basic config for a BIRD BGP route reflector.
15 17
 cat > /etc/bird/bird.conf <<EOF
@@ -38,24 +40,33 @@ protocol device {
38 40
 }
39 41
 EOF
40 42
 
41
-# Add a BGP protocol stanza for each compute node.
42
-for node in $bgp_peers; do
43
-    if [ $node != $this_node_address ]; then
44
-        cat >> /etc/bird/bird.conf <<EOF
45
-
43
+# Add a BGP protocol stanza for all peers.
44
+for node in ${client_peers[@]} ${route_reflector_peers[@]}; do
45
+  cat >> /etc/bird/bird.conf <<EOF
46 46
 protocol bgp {
47
-  description "$node";
48 47
   local as 64511;
49 48
   neighbor $node as 64511;
50 49
   multihop;
50
+EOF
51
+
52
+  if [[ "${client_peers[@]}" =~ "${node}" ]]; then
53
+    cat >> /etc/bird/bird.conf <<EOF
54
+  description "Client $node";
51 55
   rr client;
56
+EOF
57
+  else
58
+    cat >> /etc/bird/bird.conf <<EOF
59
+  description "Route Reflector $node";
60
+EOF
61
+  fi
62
+
63
+  cat >> /etc/bird/bird.conf <<EOF
64
+  rr cluster id 1.2.3.4;
52 65
   import all;
53 66
   export all;
54 67
   source address ${this_node_address};
55 68
 }
56
-
57 69
 EOF
58
-    fi
59 70
 done
60 71
 
61 72
 # Restart BIRD with the new config.

+ 0
- 16
deployment_scripts/get_controller_ip.py View File

@@ -1,16 +0,0 @@
1
-#!/usr/bin/env python
2
-# Copyright 2015 Metaswitch Networks
3
-
4
-import yaml
5
-
6
-with open("/etc/compute.yaml", "r") as f:
7
-    config = yaml.safe_load(f)
8
-
9
-for node in config["nodes"]:
10
-    if node["role"] == "primary-controller":
11
-        controller_ip = node["internal_address"]
12
-        break
13
-else:
14
-    controller_ip = None
15
-
16
-print controller_ip

+ 32
- 0
deployment_scripts/get_node_ips_by_role.py View File

@@ -0,0 +1,32 @@
1
+#!/usr/bin/env python
2
+# Copyright 2015 Metaswitch Networks
3
+
4
+import argparse
5
+import yaml
6
+
7
+from pluginutils import NODES_CONFIG
8
+
9
+
10
+def main(node_roles):
11
+    with open(NODES_CONFIG, "r") as f:
12
+        config = yaml.safe_load(f)
13
+
14
+    node_ips = [node["internal_address"] for node in config["nodes"] 
15
+                if node["role"] in node_roles]
16
+
17
+    return node_ips
18
+
19
+
20
+if __name__ == "__main__":
21
+    parser = argparse.ArgumentParser()
22
+    parser.add_argument("node_role", choices=["compute", "controller"])
23
+    args = parser.parse_args()
24
+
25
+    args.node_role = [args.node_role]
26
+    if args.node_role == ["controller"]:
27
+        args.node_role.append("primary-controller")
28
+
29
+    node_ips = main(args.node_role)
30
+    if node_ips:
31
+        print " ".join(node_ips)
32
+

+ 0
- 22
deployment_scripts/get_rr_peers.py View File

@@ -1,22 +0,0 @@
1
-#!/usr/bin/env python
2
-# Copyright 2015 Metaswitch Networks
3
-
4
-import yaml
5
-
6
-from pluginutils import NODES_CONFIG
7
-
8
-def main():
9
-    with open(NODES_CONFIG, "r") as f:
10
-        config = yaml.safe_load(f)
11
-
12
-    # The route reflector should only peer with compute nodes.
13
-    peer_ips = [node["internal_address"] for node in config["nodes"] 
14
-                if node["role"] == "compute"]
15
-
16
-    return peer_ips
17
-
18
-if __name__ == "__main__":
19
-    peer_ips = main()
20
-    if peer_ips:
21
-        print " ".join(peer_ips)
22
-

+ 42
- 0
deployment_scripts/update_etcd_cluster.sh View File

@@ -0,0 +1,42 @@
1
+#!/bin/bash
2
+# Copyright 2015 Metaswitch Networks
3
+
4
+this_node_address=$(python get_node_ip.py `hostname`)
5
+controller_node_addresses=$(python get_node_ips_by_role.py controller)
6
+
7
+for node_address in ${controller_node_addresses[@]}
8
+do
9
+  initial_cluster+="${node_address}=http://${node_address}:2380,"
10
+done
11
+
12
+initial_cluster=${initial_cluster::-1} # remove trailing comma
13
+
14
+service etcd stop
15
+rm -rf /var/lib/etcd/*
16
+awk '/exec \/usr\/bin\/etcd/{while(getline && $0 != ""){}}1' /etc/init/etcd.conf > tmp
17
+mv tmp /etc/init/etcd.conf
18
+cat << EXEC_CMD >> /etc/init/etcd.conf
19
+exec /usr/bin/etcd -name ${this_node_address}                                                                 \\
20
+                   -advertise-client-urls "http://${this_node_address}:2379,http://${this_node_address}:4001" \\
21
+                   -listen-client-urls "http://0.0.0.0:2379,http://0.0.0.0:4001"                              \\
22
+                   -listen-peer-urls "http://0.0.0.0:2380"                                                    \\
23
+                   -initial-advertise-peer-urls "http://${this_node_address}:2380"                            \\
24
+                   -initial-cluster-token fuel-cluster-1                                                      \\
25
+                   -initial-cluster ${initial_cluster}                                                        \\
26
+                   -initial-cluster-state new
27
+
28
+EXEC_CMD
29
+service etcd start
30
+
31
+retry_count=0
32
+while [[ $retry_count < 5 ]]; do
33
+  etcdctl cluster-health
34
+  if [[ $? == 0 ]]; then
35
+    break
36
+  else
37
+    ((retry_count++))
38
+    service etcd restart
39
+    sleep 2
40
+  fi
41
+done
42
+

Loading…
Cancel
Save