Fix race condition issue for k8s multi masters
When creating a multi-master cluster, all master nodes will attempt to
create kubernetes resources in the cluster at this same time, like
coredns, the dashboard, calico etc. This race conditon shouldn't be
a problem when doing declarative calls instead of imperative (kubectl
apply instead of create). However, due to [1], kubectl fails to apply
the changes and the deployemnt scripts fail causing cluster to creation
to fail in the case of Heat SoftwareDeployments. This patch passes the
ResourceGroup index of every master so that resource creation will be
attempted only from the first master node.
[1] https://github.com/kubernetes/kubernetes/issues/44165
Task: 21673
Story: 1775759
Change-Id: I83f78022481aeef945334c37ac6c812bba9791fd
(cherry picked from commit 3c72d7b88b
)
This commit is contained in:
parent
629ee4e7a8
commit
587fa6e7e3
|
@ -447,6 +447,15 @@ subjects:
|
|||
EOF
|
||||
}
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
until curl -sf "http://127.0.0.1:8080/healthz"
|
||||
do
|
||||
echo "Waiting for Kubernetes API..."
|
||||
|
|
|
@ -245,6 +245,15 @@ spec:
|
|||
EOF
|
||||
}
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Waiting for Kubernetes API..."
|
||||
until curl --silent "http://127.0.0.1:8080/version"
|
||||
do
|
||||
|
|
|
@ -102,6 +102,15 @@ writeFile $INGRESS_TRAEFIK_MANIFEST "$INGRESS_TRAEFIK_MANIFEST_CONTENT"
|
|||
INGRESS_TRAEFIK_BIN="/srv/magnum/kubernetes/bin/ingress-traefik"
|
||||
INGRESS_TRAEFIK_SERVICE="/etc/systemd/system/ingress-traefik.service"
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Binary for ingress traefik
|
||||
INGRESS_TRAEFIK_BIN_CONTENT='''#!/bin/sh
|
||||
until curl -sf "http://127.0.0.1:8080/healthz"
|
||||
|
|
|
@ -348,6 +348,15 @@ writeFile $grafanaService_file "$grafanaService_content"
|
|||
|
||||
. /etc/sysconfig/heat-params
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
|
|
@ -2,6 +2,15 @@
|
|||
|
||||
. /etc/sysconfig/heat-params
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Waiting for Kubernetes API..."
|
||||
until curl --silent "http://127.0.0.1:8080/version"
|
||||
do
|
||||
|
|
|
@ -448,6 +448,16 @@ subjects:
|
|||
EOF
|
||||
}
|
||||
|
||||
# NOTE(flwang): Let's keep the same addons yaml file on all masters,
|
||||
# but if it's not the primary/bootstrapping master, don't try to
|
||||
# create those resources to avoid race condition issue until the
|
||||
# kubectl issue https://github.com/kubernetes/kubernetes/issues/44165
|
||||
# fixed.
|
||||
|
||||
if [ "$MASTER_INDEX" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Waiting for Kubernetes API..."
|
||||
until curl --silent "http://127.0.0.1:8080/version"
|
||||
do
|
||||
|
|
|
@ -5,6 +5,7 @@ write_files:
|
|||
owner: "root:root"
|
||||
permissions: "0600"
|
||||
content: |
|
||||
MASTER_INDEX="$MASTER_INDEX"
|
||||
PROMETHEUS_MONITORING="$PROMETHEUS_MONITORING"
|
||||
KUBE_API_PUBLIC_ADDRESS="$KUBE_API_PUBLIC_ADDRESS"
|
||||
KUBE_API_PRIVATE_ADDRESS="$KUBE_API_PRIVATE_ADDRESS"
|
||||
|
|
|
@ -610,6 +610,7 @@ resources:
|
|||
list_join:
|
||||
- '-'
|
||||
- [{ get_param: 'OS::stack_name' }, 'master', '%index%']
|
||||
master_index: '%index%'
|
||||
prometheus_monitoring: {get_param: prometheus_monitoring}
|
||||
grafana_admin_passwd: {get_param: grafana_admin_passwd}
|
||||
api_public_address: {get_attr: [api_lb, floating_address]}
|
||||
|
|
|
@ -356,6 +356,12 @@ parameters:
|
|||
whether or not to use Octavia for LoadBalancer type service.
|
||||
default: False
|
||||
|
||||
master_index:
|
||||
type: string
|
||||
description: >
|
||||
the index of master node, index 0 means the master node is the primary,
|
||||
bootstrapping node.
|
||||
|
||||
resources:
|
||||
|
||||
master_wait_handle:
|
||||
|
@ -396,6 +402,7 @@ resources:
|
|||
str_replace:
|
||||
template: {get_file: ../../common/templates/kubernetes/fragments/write-heat-params-master.yaml}
|
||||
params:
|
||||
"$MASTER_INDEX": {get_param: master_index}
|
||||
"$PROMETHEUS_MONITORING": {get_param: prometheus_monitoring}
|
||||
"$KUBE_API_PUBLIC_ADDRESS": {get_attr: [api_address_switch, public_ip]}
|
||||
"$KUBE_API_PRIVATE_ADDRESS": {get_attr: [api_address_switch, private_ip]}
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
---
|
||||
fixes:
|
||||
- |
|
||||
When creating a multi-master cluster, all master nodes will attempt to
|
||||
create kubernetes resources in the cluster at this same time, like
|
||||
coredns, the dashboard, calico etc. This race conditon shouldn't be
|
||||
a problem when doing declarative calls instead of imperative (kubectl
|
||||
apply instead of create). However, due to [1], kubectl fails to apply
|
||||
the changes and the deployemnt scripts fail causing cluster to creation
|
||||
to fail in the case of Heat SoftwareDeployments. This patch passes the
|
||||
ResourceGroup index of every master so that resource creation will be
|
||||
attempted only from the first master node.
|
||||
[1] https://github.com/kubernetes/kubernetes/issues/44165
|
Loading…
Reference in New Issue