Browse Source

Merge "Add a second auxiliary etcd server for bootstrap"

Scott Hussey 11 months ago
parent
commit
b9670d70a2

+ 11
- 0
charts/apiserver/templates/daemonset.yaml View File

@@ -62,6 +62,17 @@ spec:
62 62
               exec:
63 63
                 command:
64 64
                   - /tmp/bin/pre_stop
65
+
66
+          readinessProbe:
67
+            httpGet:
68
+              host: 127.0.0.1
69
+              path: /healthz
70
+              port: {{ .Values.network.kubernetes_apiserver.port }}
71
+              scheme: HTTPS
72
+            initialDelaySeconds: 10
73
+            periodSeconds: 5
74
+            timeoutSeconds: 5
75
+
65 76
           volumeMounts:
66 77
             - name: {{ .Values.service.name }}-certs
67 78
               mountPath: /certs

+ 24
- 0
charts/apiserver/templates/etc/_kubernetes-apiserver.yaml.tpl View File

@@ -33,6 +33,7 @@ spec:
33 33
           valueFrom:
34 34
             fieldRef:
35 35
               fieldPath: status.podIP
36
+
36 37
       command:
37 38
         {{- range .Values.command_prefix }}
38 39
         - {{ . }}
@@ -58,6 +59,29 @@ spec:
58 59
 
59 60
       ports:
60 61
         - containerPort: {{ .Values.network.kubernetes_apiserver.port }}
62
+
63
+      readinessProbe:
64
+        httpGet:
65
+          host: 127.0.0.1
66
+          path: /healthz
67
+          port: {{ .Values.network.kubernetes_apiserver.port }}
68
+          scheme: HTTPS
69
+        initialDelaySeconds: 10
70
+        periodSeconds: 5
71
+        timeoutSeconds: 5
72
+
73
+      livenessProbe:
74
+        failureThreshold: 2
75
+        httpGet:
76
+          host: 127.0.0.1
77
+          path: /healthz
78
+          port: {{ .Values.network.kubernetes_apiserver.port }}
79
+          scheme: HTTPS
80
+        initialDelaySeconds: 15
81
+        periodSeconds: 10
82
+        successThreshold: 1
83
+        timeoutSeconds: 10
84
+
61 85
       volumeMounts:
62 86
         - name: etc
63 87
           mountPath: /etc/kubernetes/apiserver

+ 10
- 0
charts/controller_manager/templates/daemonset.yaml View File

@@ -61,6 +61,16 @@ spec:
61 61
               exec:
62 62
                 command:
63 63
                   - /tmp/bin/pre_stop
64
+
65
+          readinessProbe:
66
+            httpGet:
67
+              host: 127.0.0.1
68
+              path: /healthz
69
+              port: {{ .Values.network.kubernetes_controller_manager.port }}
70
+            initialDelaySeconds: 10
71
+            periodSeconds: 5
72
+            timeoutSeconds: 5
73
+
64 74
           volumeMounts:
65 75
             - name: {{ .Values.service.name }}-bin
66 76
               mountPath: /tmp/bin

+ 22
- 0
charts/controller_manager/templates/etc/_kubernetes-controller-manager.yaml.tpl View File

@@ -39,6 +39,8 @@ spec:
39 39
         {{- range .Values.command_prefix }}
40 40
         - {{ . }}
41 41
         {{- end }}
42
+        - --address=127.0.0.1
43
+        - --port={{ .Values.network.kubernetes_controller_manager.port }}
42 44
         - --configure-cloud-routes=false
43 45
         - --leader-elect=true
44 46
         - --kubeconfig=/etc/kubernetes/controller-manager/kubeconfig.yaml
@@ -47,6 +49,26 @@ spec:
47 49
         - --use-service-account-credentials=true
48 50
         - --v=5
49 51
 
52
+      readinessProbe:
53
+        httpGet:
54
+          host: 127.0.0.1
55
+          path: /healthz
56
+          port: {{ .Values.network.kubernetes_controller_manager.port }}
57
+        initialDelaySeconds: 10
58
+        periodSeconds: 5
59
+        timeoutSeconds: 5
60
+
61
+      livenessProbe:
62
+        failureThreshold: 2
63
+        httpGet:
64
+          host: 127.0.0.1
65
+          path: /healthz
66
+          port: {{ .Values.network.kubernetes_controller_manager.port }}
67
+        initialDelaySeconds: 15
68
+        periodSeconds: 10
69
+        successThreshold: 1
70
+        timeoutSeconds: 10
71
+
50 72
       volumeMounts:
51 73
         - name: etc
52 74
           mountPath: /etc/kubernetes/controller-manager

+ 2
- 0
charts/controller_manager/values.yaml View File

@@ -68,6 +68,8 @@ network:
68 68
   kubernetes_netloc: 10.96.0.1
69 69
   pod_cidr: 10.97.0.0/16
70 70
   service_cidr: 10.96.0.0/16
71
+  kubernetes_controller_manager:
72
+    port: 10252
71 73
 
72 74
 service:
73 75
   name: kubernetes-controller-manager

+ 5
- 36
charts/etcd/templates/tests/test-etcd-health.yaml View File

@@ -24,18 +24,12 @@ metadata:
24 24
   annotations:
25 25
     "helm.sh/hook": "test-success"
26 26
 spec:
27
+  nodeSelector:
28
+    {{ .Values.labels.anchor.node_selector_key }}: {{ .Values.labels.anchor.node_selector_value }}
27 29
   restartPolicy: Never
28 30
   containers:
29 31
     - name: "{{ .Release.Name }}-etcd-test"
30 32
       env:
31
-        - name: ETCD_NAME
32
-          valueFrom:
33
-            fieldRef:
34
-              fieldPath: spec.nodeName
35
-        - name: POD_IP
36
-          valueFrom:
37
-            fieldRef:
38
-              fieldPath: status.podIP
39 33
         - name: ETCDCTL_API
40 34
           value: '3'
41 35
         - name: ETCDCTL_DIAL_TIMEOUT
@@ -48,37 +42,19 @@ spec:
48 42
           value: /etc/etcd/tls/certs/anchor-etcd-client.pem
49 43
         - name: ETCDCTL_KEY
50 44
           value: /etc/etcd/tls/keys/anchor-etcd-client-key.pem
51
-        - name: CLIENT_ENDPOINT
52
-          value: https://$(POD_IP):{{ .Values.network.service_client.target_port }}
53
-        - name: PEER_ENDPOINT
54
-          value: https://$(POD_IP):{{ .Values.network.service_peer.target_port }}
55 45
       image: {{ .Values.images.tags.etcdctl }}
56 46
       imagePullPolicy: {{ .Values.images.pull_policy }}
57 47
 {{ tuple . .Values.pod.resources.test | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }}
58 48
       command:
59
-        - /bin/sh
60
-        - -c
61
-        - |
62
-          HEALTH=$(etcdctl endpoint health)
63
-          CODE=$?
64
-          echo $HEALTH
65
-          exit $CODE
49
+        - etcdctl
50
+        - endpoint
51
+        - health
66 52
       volumeMounts:
67
-        - name: {{ .Values.service.name }}-bin
68
-          mountPath: /tmp
69 53
         - name: {{ .Values.service.name }}-certs
70 54
           mountPath: /etc/etcd/tls/certs
71
-        - name: etcd-etc
72
-          mountPath: /etcd-etc
73 55
         - name: {{ .Values.service.name }}-keys
74 56
           mountPath: /etc/etcd/tls/keys
75
-        - name: {{ .Values.service.name }}-etc
76
-          mountPath: /anchor-etcd
77 57
   volumes:
78
-    - name: {{ .Values.service.name }}-bin
79
-      configMap:
80
-        name: {{ .Values.service.name }}-bin
81
-        defaultMode: 0555
82 58
     - name: {{ .Values.service.name }}-certs
83 59
       configMap:
84 60
         name: {{ .Values.service.name }}-certs
@@ -87,12 +63,5 @@ spec:
87 63
       secret:
88 64
         secretName: {{ .Values.service.name }}-keys
89 65
         defaultMode: 0444
90
-    - name: etcd-etc
91
-      hostPath:
92
-        path: {{ .Values.etcd.host_etc_path }}
93
-    - name: {{ .Values.service.name }}-etc
94
-      configMap:
95
-        name: {{ .Values.service.name }}-etc
96
-        defaultMode: 0444
97 66
 ...
98 67
 {{- end }}

+ 22
- 0
charts/scheduler/templates/etc/_kubernetes-scheduler.yaml.tpl View File

@@ -39,9 +39,31 @@ spec:
39 39
         {{- range .Values.command_prefix }}
40 40
         - {{ . }}
41 41
         {{- end }}
42
+        - --address=127.0.0.1
43
+        - --port={{ .Values.network.kubernetes_scheduler.port }}
42 44
         - --leader-elect=true
43 45
         - --kubeconfig=/etc/kubernetes/scheduler/kubeconfig.yaml
44 46
 
47
+      readinessProbe:
48
+        httpGet:
49
+          host: 127.0.0.1
50
+          path: /healthz
51
+          port: {{ .Values.network.kubernetes_scheduler.port }}
52
+        initialDelaySeconds: 10
53
+        periodSeconds: 5
54
+        timeoutSeconds: 5
55
+
56
+      livenessProbe:
57
+        failureThreshold: 2
58
+        httpGet:
59
+          host: 127.0.0.1
60
+          path: /healthz
61
+          port: {{ .Values.network.kubernetes_scheduler.port }}
62
+        initialDelaySeconds: 15
63
+        periodSeconds: 10
64
+        successThreshold: 1
65
+        timeoutSeconds: 15
66
+
45 67
       volumeMounts:
46 68
         - name: etc
47 69
           mountPath: /etc/kubernetes/scheduler

+ 10
- 0
charts/scheduler/templates/sched-anchor.yaml View File

@@ -53,6 +53,16 @@ spec:
53 53
               exec:
54 54
                 command:
55 55
                   - /tmp/bin/pre_stop
56
+
57
+          readinessProbe:
58
+            httpGet:
59
+              host: 127.0.0.1
60
+              path: /healthz
61
+              port: {{ .Values.network.kubernetes_scheduler.port }}
62
+            initialDelaySeconds: 10
63
+            periodSeconds: 5
64
+            timeoutSeconds: 5
65
+
56 66
           volumeMounts:
57 67
             - name: bin
58 68
               mountPath: /tmp/bin

+ 2
- 0
charts/scheduler/values.yaml View File

@@ -69,6 +69,8 @@ images:
69 69
 
70 70
 network:
71 71
   kubernetes_netloc: 10.96.0.1
72
+  kubernetes_scheduler:
73
+    port: 10251
72 74
 
73 75
 service:
74 76
   name: kubernetes-scheduler

+ 9
- 0
examples/basic/armada-resources.yaml View File

@@ -68,6 +68,7 @@ metadata:
68 68
   storagePolicy: cleartext
69 69
 data:
70 70
   description: Kubernetes components
71
+  sequenced: true
71 72
   chart_group:
72 73
     - haproxy
73 74
     - kubernetes-etcd
@@ -86,6 +87,7 @@ metadata:
86 87
   storagePolicy: cleartext
87 88
 data:
88 89
   description: UCP platform components
90
+  sequenced: true
89 91
   chart_group:
90 92
     - promenade
91 93
 ---
@@ -585,6 +587,10 @@ data:
585 587
             kubernetes-apiserver:
586 588
               server_opts: "check port 6443"
587 589
               conf_parts:
590
+                global:
591
+                  - timeout connect 5000ms
592
+                  - timeout client 30s
593
+                  - timeout server 30s
588 594
                 frontend:
589 595
                   - mode tcp
590 596
                   - bind *:6553
@@ -1107,6 +1113,9 @@ data:
1107 1113
     no_hooks: false
1108 1114
   upgrade:
1109 1115
     no_hooks: false
1116
+  timeout: 600
1117
+  wait:
1118
+    timeout: 600
1110 1119
   values:
1111 1120
     images:
1112 1121
       tags:

+ 1
- 0
examples/complete/armada-resources.yaml View File

@@ -373,6 +373,7 @@ data:
373 373
   timeout: 600
374 374
   wait:
375 375
     timeout: 600
376
+  test: true
376 377
   upgrade:
377 378
     no_hooks: true
378 379
   values:

+ 3
- 3
promenade/templates/include/genesis-etcd/common-volumes.yaml View File

@@ -1,6 +1,6 @@
1
-    - name: data
1
+    - name: data-{{ etcd_name }}
2 2
       hostPath:
3
-        path: /var/lib/etcd/{{ etcd_name }}
4
-    - name: pki
3
+        path: /var/lib/etcd/{{ volume_name }}
4
+    - name: pki-{{ etcd_name }}
5 5
       hostPath:
6 6
         path: /etc/genesis/etcd/pki

+ 4
- 4
promenade/templates/include/genesis-etcd/server-container.yaml View File

@@ -1,4 +1,4 @@
1
-    - name: etcd
1
+    - name: etcd-{{ etcd_name }}
2 2
       image: {{ config['Genesis:images.kubernetes.etcd'] }}
3 3
       env:
4 4
         - name: ETCD_NAME
@@ -38,7 +38,7 @@
38 38
         - name: ETCD_INITIAL_CLUSTER_STATE
39 39
           value: new
40 40
         - name: ETCD_INITIAL_CLUSTER
41
-          value: genesis=https://{{ config['Genesis:ip'] }}:2380,auxiliary=https://{{ config['Genesis:ip'] }}:12380
41
+          value: {{ config['Genesis:hostname'] }}=https://{{ config['Genesis:ip'] }}:2380,auxiliary-0=https://{{ config['Genesis:ip'] }}:12380,auxiliary-1=https://{{ config['Genesis:ip'] }}:22380
42 42
         - name: ETCDCTL_API
43 43
           value: '3'
44 44
         - name: ETCDCTL_DIAL_TIMEOUT
@@ -57,7 +57,7 @@
57 57
         - name: peer
58 58
           containerPort: {{ peer_port }}
59 59
       volumeMounts:
60
-        - name: data
60
+        - name: data-{{ etcd_name }}
61 61
           mountPath: /var/lib/etcd
62
-        - name: pki
62
+        - name: pki-{{ etcd_name }}
63 63
           mountPath: /etc/etcd/pki

+ 49
- 16
promenade/templates/roles/genesis/etc/kubernetes/manifests/auxiliary-kubernetes-etcd.yaml View File

@@ -9,9 +9,15 @@ metadata:
9 9
     promenade: genesis
10 10
 spec:
11 11
   hostNetwork: true
12
-{%- with etcd_name = 'auxiliary', client_port = 12379, peer_port = 12380  %}
13 12
   containers:
13
+{%- with etcd_name = 'auxiliary-0', client_port = 12379, peer_port = 12380  %}
14 14
 {% include "genesis-etcd/server-container.yaml" with context %}
15
+{%- endwith %}
16
+
17
+{%- with etcd_name = 'auxiliary-1', client_port = 22379, peer_port = 22380  %}
18
+{% include "genesis-etcd/server-container.yaml" with context %}
19
+{%- endwith %}
20
+
15 21
     - name: monitor
16 22
       image: {{ config['Genesis:images.kubernetes.etcd'] }}
17 23
       command:
@@ -19,21 +25,41 @@ spec:
19 25
         - -c
20 26
         - |-
21 27
           set -x
22
-          MEMBER_COUNT=$(etcdctl member list | grep '\bstarted\b' | wc -l)
23
-          if [ $MEMBER_COUNT -gt 1 ]; then
24
-              MEMBER_ID=$(etcdctl member list | grep auxiliary | awk -F ', ' '{ print $1 }')
25
-              if [ -n $MEMBER_ID ]; then
26
-                  while [ $MEMBER_COUNT -lt 3 ]; do
27
-                      sleep 30
28
-                      MEMBER_COUNT=$(etcdctl member list | grep '\bstarted\b' | wc -l)
29
-                  done
30
-                  set -e
28
+
29
+          function external_member_count() {
30
+              etcdctl member list \
31
+                  | grep '\bstarted\b' \
32
+                  | grep -Ev "\\b({{ config['Genesis:hostname'] }}|auxiliary-0|auxiliary-1)\\b" \
33
+                  | wc -l
34
+          }
35
+
36
+          function remove_if_possible() {
37
+              MEMBER_NAME=$1
38
+              MEMBER_ID=$(etcdctl member list | grep "${MEMBER_NAME}" | awk -F ', ' '{ print $1 }')
39
+              if [ -n "${MEMBER_ID}" ]; then
31 40
                   etcdctl member remove $MEMBER_ID
32 41
               fi
42
+          }
43
+
44
+          # NOTE(mark-burnett): If there are any non-genesis members, then we are ready to
45
+          # remove the auxiliary members.  Otherwise, wait.
46
+          while [ ! "$(external_member_count)" -gt 0 ]; do
47
+              sleep 10
48
+          done
33 49
 
34
-              rm -rf /var/lib/etcd/* /manifests/auxiliary-kubernetes-etcd.yaml
35
-              sleep 10000
36
-          fi
50
+          # NOTE(mark-burnett): Failures beyond this point are unexpected, but
51
+          # should be recovered by restarting this container.
52
+          set -e
53
+
54
+          remove_if_possible auxiliary-0
55
+          remove_if_possible auxiliary-1
56
+
57
+          rm -rf \
58
+              /var/lib/etcd/auxiliary-0 \
59
+              /var/lib/etcd/auxiliary-1 \
60
+              /manifests/auxiliary-kubernetes-etcd.yaml
61
+
62
+          sleep 10000
37 63
       env:
38 64
         - name: ETCDCTL_API
39 65
           value: '3'
@@ -48,16 +74,23 @@ spec:
48 74
         - name: ETCDCTL_KEY
49 75
           value: /etc/etcd/pki/etcd-client-key.pem
50 76
       volumeMounts:
51
-        - name: data
77
+        - name: all-etcd-data
52 78
           mountPath: /var/lib/etcd
53
-        - name: pki
79
+        - name: pki-auxiliary-0
54 80
           mountPath: /etc/etcd/pki
55 81
         - name: manifest
56 82
           mountPath: /manifests
57 83
   volumes:
84
+{%- with etcd_name = 'auxiliary-0', client_port = 12379, peer_port = 12380, volume_name = 'auxiliary-0' %}
85
+{% include "genesis-etcd/common-volumes.yaml" with context %}
86
+{%- endwith %}
87
+{%- with etcd_name = 'auxiliary-1', client_port = 22379, peer_port = 22380, volume_name = 'auxiliary-1' %}
58 88
 {% include "genesis-etcd/common-volumes.yaml" with context %}
89
+{%- endwith %}
59 90
     - name: manifest
60 91
       hostPath:
61 92
         path: /etc/kubernetes/manifests
62
-{%- endwith %}
93
+    - name: all-etcd-data
94
+      hostPath:
95
+        path: /var/lib/etcd
63 96
 ...

+ 2
- 2
promenade/templates/roles/genesis/etc/kubernetes/manifests/bootstrap-armada.yaml View File

@@ -59,7 +59,7 @@ spec:
59 59
 
60 60
         while true; do
61 61
             sleep 10
62
-            if armada --debug \
62
+            if armada \
63 63
                     apply \
64 64
                     --target-manifest {{ config.get_path('Genesis:armada.target_manifest', 'cluster-bootstrap') }} \
65 65
                     --tiller-host 127.0.0.1 \
@@ -134,7 +134,7 @@ spec:
134 134
       - --bind-address=0.0.0.0
135 135
       - --runtime-config=batch/v2alpha1=true
136 136
       - --allow-privileged=true
137
-      - --etcd-servers=https://localhost:2379
137
+      - --etcd-servers=https://localhost:12379
138 138
       - --etcd-cafile=/etc/kubernetes/apiserver/pki/etcd-client-ca.pem
139 139
       - --etcd-certfile=/etc/kubernetes/apiserver/pki/etcd-client.pem
140 140
       - --etcd-keyfile=/etc/kubernetes/apiserver/pki/etcd-client-key.pem

+ 1
- 1
promenade/templates/roles/genesis/etc/kubernetes/manifests/kubernetes-etcd.yaml View File

@@ -9,7 +9,7 @@ metadata:
9 9
     kubernetes-etcd-service: enabled
10 10
 spec:
11 11
   hostNetwork: true
12
-{%- with etcd_name = 'genesis', client_port = 2379, peer_port = 2380  %}
12
+{%- with etcd_name = config['Genesis:hostname'], client_port = 2379, peer_port = 2380, volume_name = 'kubernetes' %}
13 13
   containers:
14 14
 {% include "genesis-etcd/server-container.yaml" with context %}
15 15
   volumes:

+ 0
- 2
tools/g2/lib/virsh.sh View File

@@ -134,8 +134,6 @@ vm_create() {
134 134
         --memory "$(config_vm_memory)" \
135 135
         --import \
136 136
         --disk "vol=${VIRSH_POOL}/promenade-${NAME}.img,${DISK_OPTS}" \
137
-        --disk "pool=${VIRSH_POOL},size=20,${DISK_OPTS}" \
138
-        --disk "pool=${VIRSH_POOL},size=20,${DISK_OPTS}" \
139 137
         --disk "vol=${VIRSH_POOL}/cloud-init-${NAME}.iso,device=cdrom" &>> "${LOG_FILE}"
140 138
 
141 139
     ssh_wait "${NAME}"

+ 6
- 0
tools/gate/config-templates/bootstrap-armada-config.yaml View File

@@ -324,6 +324,9 @@ data:
324 324
   release: calico-etcd
325 325
   namespace: kube-system
326 326
   timeout: 600
327
+  wait:
328
+    timeout: 6000
329
+  test: true
327 330
   upgrade:
328 331
     no_hooks: true
329 332
   values:
@@ -953,6 +956,9 @@ data:
953 956
   release: kubernetes-etcd
954 957
   namespace: kube-system
955 958
   timeout: 600
959
+  wait:
960
+    timeout: 6000
961
+  test: true
956 962
   upgrade:
957 963
     no_hooks: true
958 964
   values:

Loading…
Cancel
Save