Merge "Support auto_healing_controller"
This commit is contained in:
commit
f1cf3d0b38
@ -403,6 +403,10 @@ the table are linked to more details elsewhere in the user guide.
|
||||
| `auto_healing_enabled`_ | - true | false |
|
||||
| | - false | |
|
||||
+---------------------------------------+--------------------+---------------+
|
||||
| `auto_healing_controller`_ | see below | "draino" |
|
||||
+---------------------------------------+--------------------+---------------+
|
||||
| `magnum_auto_healer_tag`_ | see below | see below |
|
||||
+---------------------------------------+--------------------+---------------+
|
||||
| `auto_scaling_enabled`_ | - true | false |
|
||||
| | - false | |
|
||||
+---------------------------------------+--------------------+---------------+
|
||||
@ -1307,6 +1311,14 @@ _`master_lb_floating_ip_enabled`
|
||||
_`auto_healing_enabled`
|
||||
If set to true, auto healing feature will be enabled. Defaults to false.
|
||||
|
||||
_`auto_healing_controller`
|
||||
This label sets the auto-healing service to be used. Currently ``draino`` and
|
||||
``magnum-auto-healer`` are supported. The default is ``draino``. For more
|
||||
details, see
|
||||
`draino doc <https://github.com/planetlabs/draino>`_ and
|
||||
`magnum-auto-healer doc
|
||||
<https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/using-magnum-auto-healer.md>`_.
|
||||
|
||||
_`auto_scaling_enabled`
|
||||
If set to true, auto scaling feature will be enabled. Defaults to true.
|
||||
|
||||
@ -1317,6 +1329,10 @@ _`node_problem_detector_tag`
|
||||
_`draino_tag`
|
||||
This label allows users to select a specific Draino version.
|
||||
|
||||
_`magnum_auto_healer_tag`
|
||||
This label allows users to select a specific magnum-auto-healer version.
|
||||
The default value for Train: v1.15.0
|
||||
|
||||
_`autoscaler_tag`
|
||||
This label allows users to select a specific Cluster Autoscaler version.
|
||||
|
||||
|
@ -160,7 +160,9 @@ KUBELET_ARGS="${KUBELET_ARGS} --client-ca-file=${CERT_DIR}/ca.crt --tls-cert-fil
|
||||
# specified cgroup driver
|
||||
KUBELET_ARGS="${KUBELET_ARGS} --cgroup-driver=${CGROUP_DRIVER}"
|
||||
|
||||
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then
|
||||
auto_healing_enabled=$(echo ${AUTO_HEALING_ENABLED} | tr '[:upper:]' '[:lower:]')
|
||||
autohealing_controller=$(echo ${AUTO_HEALING_CONTROLLER} | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "${auto_healing_enabled}" = "true" && "${autohealing_controller}" = "draino" ]]; then
|
||||
KUBELET_ARGS="${KUBELET_ARGS} --node-labels=draino-enabled=true"
|
||||
fi
|
||||
|
||||
|
@ -8,7 +8,7 @@ printf "Starting to run ${step}\n"
|
||||
_gcr_prefix=${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}
|
||||
|
||||
# Either auto scaling or auto healing we need CA to be deployed
|
||||
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $NPD_ENABLED | tr '[:upper:]' '[:lower:]')" = "true"]; then
|
||||
if [[ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $NPD_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]]; then
|
||||
# Generate Node Problem Detector manifest file
|
||||
NPD_DEPLOY=/srv/magnum/kubernetes/manifests/npd.yaml
|
||||
|
||||
@ -121,18 +121,15 @@ EOF
|
||||
fi
|
||||
|
||||
|
||||
_docker_draino_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/planetlabs/}
|
||||
step="enable-auto-healing"
|
||||
printf "Starting to run ${step}\n"
|
||||
function enable_draino {
|
||||
echo "Installing draino"
|
||||
_docker_draino_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/planetlabs/}
|
||||
draino_manifest=/srv/magnum/kubernetes/manifests/draino.yaml
|
||||
|
||||
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then
|
||||
# Generate Draino manifest file
|
||||
DRAINO_DEPLOY=/srv/magnum/kubernetes/manifests/draino.yaml
|
||||
|
||||
[ -f ${DRAINO_DEPLOY} ] || {
|
||||
echo "Writing File: $DRAINO_DEPLOY"
|
||||
mkdir -p $(dirname ${DRAINO_DEPLOY})
|
||||
cat << EOF > ${DRAINO_DEPLOY}
|
||||
[ -f ${draino_manifest} ] || {
|
||||
echo "Writing File: $draino_manifest"
|
||||
mkdir -p $(dirname ${draino_manifest})
|
||||
cat << EOF > ${draino_manifest}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
@ -222,7 +219,156 @@ spec:
|
||||
EOF
|
||||
}
|
||||
|
||||
kubectl apply -f ${DRAINO_DEPLOY}
|
||||
kubectl apply -f ${draino_manifest}
|
||||
}
|
||||
|
||||
function enable_magnum_auto_healer {
|
||||
echo "Installing magnum_auto_healer"
|
||||
image_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/k8scloudprovider/}
|
||||
image_prefix=${image_prefix%/}
|
||||
magnum_auto_healer_manifest=/srv/magnum/kubernetes/manifests/magnum_auto_healer.yaml
|
||||
|
||||
[ -f ${magnum_auto_healer_manifest} ] || {
|
||||
echo "Writing File: ${magnum_auto_healer_manifest}"
|
||||
mkdir -p $(dirname ${magnum_auto_healer_manifest})
|
||||
cat << EOF > ${magnum_auto_healer_manifest}
|
||||
---
|
||||
kind: ServiceAccount
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: magnum-auto-healer
|
||||
namespace: kube-system
|
||||
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: magnum-auto-healer
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cluster-admin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: magnum-auto-healer
|
||||
namespace: kube-system
|
||||
|
||||
---
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: magnum-auto-healer-config
|
||||
namespace: kube-system
|
||||
data:
|
||||
config.yaml: |
|
||||
cluster-name: ${CLUSTER_UUID}
|
||||
dry-run: false
|
||||
monitor-interval: 30s
|
||||
check-delay-after-add: 20m
|
||||
leader-elect: true
|
||||
healthcheck:
|
||||
master:
|
||||
- type: Endpoint
|
||||
params:
|
||||
unhealthy-duration: 3m
|
||||
protocol: HTTPS
|
||||
port: 6443
|
||||
endpoints: ["/healthz"]
|
||||
ok-codes: [200]
|
||||
- type: NodeCondition
|
||||
params:
|
||||
unhealthy-duration: 3m
|
||||
types: ["Ready"]
|
||||
ok-values: ["True"]
|
||||
worker:
|
||||
- type: NodeCondition
|
||||
params:
|
||||
unhealthy-duration: 3m
|
||||
types: ["Ready"]
|
||||
ok-values: ["True"]
|
||||
openstack:
|
||||
auth-url: ${AUTH_URL}
|
||||
user-id: ${TRUSTEE_USER_ID}
|
||||
password: ${TRUSTEE_PASSWORD}
|
||||
trust-id: ${TRUST_ID}
|
||||
region: ${REGION_NAME}
|
||||
ca-file: /etc/kubernetes/ca-bundle.crt
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: magnum-auto-healer
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: magnum-auto-healer
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: magnum-auto-healer
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: magnum-auto-healer
|
||||
spec:
|
||||
hostNetwork: true
|
||||
serviceAccountName: magnum-auto-healer
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
- key: CriticalAddonsOnly
|
||||
operator: Exists
|
||||
- effect: NoExecute
|
||||
operator: Exists
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/master: ""
|
||||
containers:
|
||||
- name: magnum-auto-healer
|
||||
image: ${image_prefix}/magnum-auto-healer:${MAGNUM_AUTO_HEALER_TAG}
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- /bin/magnum-auto-healer
|
||||
- --config=/etc/magnum-auto-healer/config.yaml
|
||||
- --v
|
||||
- "2"
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/magnum-auto-healer
|
||||
- name: kubernetes-config
|
||||
mountPath: /etc/kubernetes
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: magnum-auto-healer-config
|
||||
- name: kubernetes-config
|
||||
hostPath:
|
||||
path: /etc/kubernetes
|
||||
EOF
|
||||
}
|
||||
|
||||
kubectl apply -f ${magnum_auto_healer_manifest}
|
||||
}
|
||||
|
||||
step="enable-auto-healing"
|
||||
printf "Starting to run ${step}\n"
|
||||
|
||||
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then
|
||||
autohealing_controller=$(echo ${AUTO_HEALING_CONTROLLER} | tr '[:upper:]' '[:lower:]')
|
||||
case "${autohealing_controller}" in
|
||||
"")
|
||||
echo "No autohealing controller configured."
|
||||
;;
|
||||
"draino")
|
||||
enable_draino
|
||||
;;
|
||||
"magnum-auto-healer")
|
||||
enable_magnum_auto_healer
|
||||
;;
|
||||
*)
|
||||
echo "Autohealing controller ${autohealing_controller} not supported."
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
printf "Finished running ${step}\n"
|
||||
|
@ -7,8 +7,11 @@ printf "Starting to run ${step}\n"
|
||||
|
||||
_docker_ca_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}
|
||||
|
||||
# Either auto scaling or auto healing we need CA to be deployed
|
||||
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $AUTO_SCALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true"]; then
|
||||
auto_scaling_enabled=$(echo $AUTO_SCALING_ENABLED | tr '[:upper:]' '[:lower:]')
|
||||
auto_healing_enabled=$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')
|
||||
autohealing_controller=$(echo ${AUTO_HEALING_CONTROLLER} | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
if [[ "${auto_scaling_enabled}" = "true" || ("${auto_healing_enabled}" = "true" && "${autohealing_controller}" = "draino") ]]; then
|
||||
# Generate Autoscaler manifest file
|
||||
AUTOSCALER_DEPLOY=/srv/magnum/kubernetes/manifests/autoscaler.yaml
|
||||
|
||||
|
@ -97,8 +97,10 @@ HEAT_PARAMS=/etc/sysconfig/heat-params
|
||||
NODE_PROBLEM_DETECTOR_TAG="$NODE_PROBLEM_DETECTOR_TAG"
|
||||
NGINX_INGRESS_CONTROLLER_TAG="$NGINX_INGRESS_CONTROLLER_TAG"
|
||||
AUTO_HEALING_ENABLED="$AUTO_HEALING_ENABLED"
|
||||
AUTO_HEALING_CONTROLLER="$AUTO_HEALING_CONTROLLER"
|
||||
AUTO_SCALING_ENABLED="$AUTO_SCALING_ENABLED"
|
||||
DRAINO_TAG="$DRAINO_TAG"
|
||||
MAGNUM_AUTO_HEALER_TAG="$MAGNUM_AUTO_HEALER_TAG"
|
||||
AUTOSCALER_TAG="$AUTOSCALER_TAG"
|
||||
MIN_NODE_COUNT="$MIN_NODE_COUNT"
|
||||
MAX_NODE_COUNT="$MAX_NODE_COUNT"
|
||||
|
@ -55,6 +55,8 @@ KUBELET_OPTIONS="$KUBELET_OPTIONS"
|
||||
KUBEPROXY_OPTIONS="$KUBEPROXY_OPTIONS"
|
||||
OCTAVIA_ENABLED="$OCTAVIA_ENABLED"
|
||||
HEAT_CONTAINER_AGENT_TAG="$HEAT_CONTAINER_AGENT_TAG"
|
||||
AUTO_HEALING_ENABLED="$AUTO_HEALING_ENABLED"
|
||||
AUTO_HEALING_CONTROLLER="$AUTO_HEALING_CONTROLLER"
|
||||
EOF
|
||||
}
|
||||
|
||||
|
@ -140,6 +140,7 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
|
||||
'node_problem_detector_tag',
|
||||
'nginx_ingress_controller_tag',
|
||||
'auto_healing_enabled', 'auto_scaling_enabled',
|
||||
'auto_healing_controller', 'magnum_auto_healer_tag',
|
||||
'draino_tag', 'autoscaler_tag',
|
||||
'min_node_count', 'max_node_count', 'npd_enabled']
|
||||
|
||||
|
@ -610,6 +610,17 @@ parameters:
|
||||
default:
|
||||
false
|
||||
|
||||
auto_healing_controller:
|
||||
type: string
|
||||
description: >
|
||||
The service to be deployed for auto-healing.
|
||||
default: "draino"
|
||||
|
||||
magnum_auto_healer_tag:
|
||||
type: string
|
||||
description: tag of the magnum-auto-healer service.
|
||||
default: "v1.15.0"
|
||||
|
||||
auto_scaling_enabled:
|
||||
type: boolean
|
||||
description: >
|
||||
@ -949,6 +960,8 @@ resources:
|
||||
node_problem_detector_tag: {get_param: node_problem_detector_tag}
|
||||
nginx_ingress_controller_tag: {get_param: nginx_ingress_controller_tag}
|
||||
auto_healing_enabled: {get_param: auto_healing_enabled}
|
||||
auto_healing_controller: {get_param: auto_healing_controller}
|
||||
magnum_auto_healer_tag: {get_param: magnum_auto_healer_tag}
|
||||
auto_scaling_enabled: {get_param: auto_scaling_enabled}
|
||||
draino_tag: {get_param: draino_tag}
|
||||
autoscaler_tag: {get_param: autoscaler_tag}
|
||||
@ -1090,6 +1103,7 @@ resources:
|
||||
heat_container_agent_tag: {get_param: heat_container_agent_tag}
|
||||
auto_healing_enabled: {get_param: auto_healing_enabled}
|
||||
npd_enabled: {get_param: npd_enabled}
|
||||
auto_healing_controller: {get_param: auto_healing_controller}
|
||||
|
||||
outputs:
|
||||
|
||||
|
@ -457,6 +457,17 @@ parameters:
|
||||
description: >
|
||||
true if the auto healing feature should be enabled
|
||||
|
||||
auto_healing_controller:
|
||||
type: string
|
||||
description: >
|
||||
The service to be deployed for auto-healing.
|
||||
default: "draino"
|
||||
|
||||
magnum_auto_healer_tag:
|
||||
type: string
|
||||
description: tag of the magnum-auto-healer service.
|
||||
default: "v1.15.0"
|
||||
|
||||
auto_scaling_enabled:
|
||||
type: boolean
|
||||
description: >
|
||||
@ -635,6 +646,8 @@ resources:
|
||||
"$NODE_PROBLEM_DETECTOR_TAG": {get_param: node_problem_detector_tag}
|
||||
"$NGINX_INGRESS_CONTROLLER_TAG": {get_param: nginx_ingress_controller_tag}
|
||||
"$AUTO_HEALING_ENABLED": {get_param: auto_healing_enabled}
|
||||
"$AUTO_HEALING_CONTROLLER": {get_param: auto_healing_controller}
|
||||
"$MAGNUM_AUTO_HEALER_TAG": {get_param: magnum_auto_healer_tag}
|
||||
"$AUTO_SCALING_ENABLED": {get_param: auto_scaling_enabled}
|
||||
"$DRAINO_TAG": {get_param: draino_tag}
|
||||
"$AUTOSCALER_TAG": {get_param: autoscaler_tag}
|
||||
|
@ -281,6 +281,12 @@ parameters:
|
||||
description: >
|
||||
true if the auto healing feature should be enabled
|
||||
|
||||
auto_healing_controller:
|
||||
type: string
|
||||
description: >
|
||||
The service to be deployed for auto-healing.
|
||||
default: "draino"
|
||||
|
||||
npd_enabled:
|
||||
type: boolean
|
||||
description: >
|
||||
@ -373,6 +379,7 @@ resources:
|
||||
$OCTAVIA_ENABLED: {get_param: octavia_enabled}
|
||||
$HEAT_CONTAINER_AGENT_TAG: {get_param: heat_container_agent_tag}
|
||||
$AUTO_HEALING_ENABLED: {get_param: auto_healing_enabled}
|
||||
$AUTO_HEALING_CONTROLLER: {get_param: auto_healing_controller}
|
||||
$NPD_ENABLED: {get_param: npd_enabled}
|
||||
- get_file: ../../common/templates/kubernetes/fragments/write-kube-os-config.sh
|
||||
- get_file: ../../common/templates/kubernetes/fragments/make-cert-client.sh
|
||||
|
@ -526,6 +526,10 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
||||
'traefik_ingress_controller_tag')
|
||||
auto_healing_enabled = mock_cluster.labels.get(
|
||||
'auto_healing_enabled')
|
||||
auto_healing_controller = mock_cluster.labels.get(
|
||||
'auto_healing_controller')
|
||||
magnum_auto_healer_tag = mock_cluster.labels.get(
|
||||
'magnum_auto_healer_tag')
|
||||
auto_scaling_enabled = mock_cluster.labels.get(
|
||||
'auto_scaling_enabled')
|
||||
draino_tag = mock_cluster.labels.get('draino_tag')
|
||||
@ -602,6 +606,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
||||
'tiller_namespace': tiller_namespace,
|
||||
'node_problem_detector_tag': npd_tag,
|
||||
'auto_healing_enabled': auto_healing_enabled,
|
||||
'auto_healing_controller': auto_healing_controller,
|
||||
'magnum_auto_healer_tag': magnum_auto_healer_tag,
|
||||
'auto_scaling_enabled': auto_scaling_enabled,
|
||||
'draino_tag': draino_tag,
|
||||
'autoscaler_tag': autoscaler_tag,
|
||||
@ -936,6 +942,10 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
||||
'traefik_ingress_controller_tag')
|
||||
auto_healing_enabled = mock_cluster.labels.get(
|
||||
'auto_healing_enabled')
|
||||
auto_healing_controller = mock_cluster.labels.get(
|
||||
'auto_healing_controller')
|
||||
magnum_auto_healer_tag = mock_cluster.labels.get(
|
||||
'magnum_auto_healer_tag')
|
||||
auto_scaling_enabled = mock_cluster.labels.get(
|
||||
'auto_scaling_enabled')
|
||||
draino_tag = mock_cluster.labels.get('draino_tag')
|
||||
@ -1014,6 +1024,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
||||
'tiller_namespace': tiller_namespace,
|
||||
'node_problem_detector_tag': npd_tag,
|
||||
'auto_healing_enabled': auto_healing_enabled,
|
||||
'auto_healing_controller': auto_healing_controller,
|
||||
'magnum_auto_healer_tag': magnum_auto_healer_tag,
|
||||
'auto_scaling_enabled': auto_scaling_enabled,
|
||||
'draino_tag': draino_tag,
|
||||
'autoscaler_tag': autoscaler_tag,
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- A new tag ``auto_healing_controller`` is introduced to allow the user to
|
||||
choose the auto-healing service when ``auto_healing_enabled`` is specified
|
||||
in the labels, ``draino`` and ``magnum-auto-healer`` are supported for now.
|
||||
Another label ``magnum_auto_healer_tag`` is also added to specify the
|
||||
``magnum-auto-healer`` image tag.
|
Loading…
Reference in New Issue
Block a user