[fedora_atomic] Support auto healing for k8s

Using Node Problem Detector, Draino and AutoScaler to support
auto healing for K8s cluster, user can use a new label
"auto_healing_enabled' to turn on/off it.

Meanwhile, a new label "auto_scaling_enabled" is also introduced
to enable the capability to let the k8s cluster auto scale based
its workload.

Task: 28923
Story: 2004782

Change-Id: I25af2a72a7a960205929374d2300bd83d4d20960
This commit is contained in:
Feilong Wang 2019-01-17 15:47:55 +13:00
parent f194b5b7fa
commit 75fab6ff37
13 changed files with 502 additions and 20 deletions

View File

@ -392,6 +392,22 @@ the table are linked to more details elsewhere in the user guide.
| `master_lb_floating_ip_enabled`_ | - true | see below |
| | - false | |
+---------------------------------------+--------------------+---------------+
| `auto_healing_enabled`_ | - true | false |
| | - false | |
+---------------------------------------+--------------------+---------------+
| `auto_scaling_enabled`_ | - true | true |
| | - false | |
+---------------------------------------+--------------------+---------------+
| `node_problem_detector_tag`_ | see below | see below |
+---------------------------------------+--------------------+---------------+
| `draino_tag`_ | see below | see below |
+---------------------------------------+--------------------+---------------+
| `autoscaler_tag`_ | see below | see below |
+---------------------------------------+--------------------+---------------+
| `min_node_count`_ | see below | see below |
+---------------------------------------+--------------------+---------------+
| `max_node_count`_ | see below | see below |
+---------------------------------------+--------------------+---------------+
Cluster
-------
@ -1119,6 +1135,9 @@ _`container_infra_prefix`
* quay.io/coreos/configmap-reload:v0.0.1
* quay.io/coreos/prometheus-config-reloader:v0.26.0
* quay.io/prometheus/prometheus:v2.5.0
* k8s.gcr.io/node-problem-detector:v0.6.2
* docker.io/planetlabs/draino:abf028a
* docker.io/openstackmagnum/cluster-autoscaler:v1.0
_`kube_tag`
This label allows users to select `a specific Kubernetes release,
@ -1257,6 +1276,31 @@ _`master_lb_floating_ip_enabled`
``master_lb_enabled`` is set. If not specified, the default value is the same
as template property ``floating_ip_enabled``.
_`auto_healing_enabled`
If set to true, auto healing feature will be enabled. Defaults to false.
_`auto_scaling_enabled`
If set to true, auto scaling feature will be enabled. Defaults to true.
_`node_problem_detector_tag`
This label allows users to select a specific Node Problem Detector
version.
_`draino_tag`
This label allows users to select a specific Draino version.
_`autoscaler_tag`
This label allows users to select a specific Cluster Autoscaler version.
_`min_node_count`
The minmium node count of the cluster when doing auto scaling or auto
healing. Defaults to 1.
_`max_node_count`
The maxmium node count of the cluster when doing auto scaling or auto
healing.
External load balancer for services
-----------------------------------

View File

@ -154,6 +154,10 @@ KUBELET_ARGS="${KUBELET_ARGS} --client-ca-file=${CERT_DIR}/ca.crt --tls-cert-fil
# specified cgroup driver
KUBELET_ARGS="${KUBELET_ARGS} --cgroup-driver=${CGROUP_DRIVER}"
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then
KUBELET_ARGS="${KUBELET_ARGS} --node-labels=draino-enabled=true"
fi
systemctl disable docker
if cat /usr/lib/systemd/system/docker.service | grep 'native.cgroupdriver'; then
cp /usr/lib/systemd/system/docker.service /etc/systemd/system/

View File

@ -1,6 +1,6 @@
#!/bin/sh
step="enable-auto-healing"
step="enable-node-problem-detector"
printf "Starting to run ${step}\n"
. /etc/sysconfig/heat-params
@ -68,7 +68,7 @@ spec:
- "/bin/sh"
- "-c"
# Pass both config to support both journald and syslog.
- "exec /node-problem-detector --logtostderr --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json,/config/docker-monitor.json,/config/docker-monitor-filelog.json >>/var/log/node-problem-detector.log 2>&1"
- "exec /node-problem-detector --logtostderr --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json,/config/docker-monitor.json,/config/docker-monitor-filelog.json 2>&1 | tee /var/log/node-problem-detector.log"
securityContext:
privileged: true
resources:
@ -115,3 +115,109 @@ done
kubectl apply -f ${NPD_DEPLOY}
printf "Finished running ${step}\n"
_docker_draino_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/planetlabs/}
step="enable-auto-healing"
printf "Starting to run ${step}\n"
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then
# Generate Draino manifest file
DRAINO_DEPLOY=/srv/magnum/kubernetes/manifests/draino.yaml
[ -f ${DRAINO_DEPLOY} ] || {
echo "Writing File: $DRAINO_DEPLOY"
mkdir -p $(dirname ${DRAINO_DEPLOY})
cat << EOF > ${DRAINO_DEPLOY}
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels: {component: draino}
name: draino
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels: {component: draino}
name: draino
rules:
- apiGroups: ['']
resources: [events]
verbs: [create, patch, update]
- apiGroups: ['']
resources: [nodes]
verbs: [get, watch, list, update]
- apiGroups: ['']
resources: [nodes/status]
verbs: [patch]
- apiGroups: ['']
resources: [pods]
verbs: [get, watch, list]
- apiGroups: ['']
resources: [pods/eviction]
verbs: [create]
- apiGroups: [extensions]
resources: [daemonsets]
verbs: [get, watch, list]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels: {component: draino}
name: draino
roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: draino}
subjects:
- {kind: ServiceAccount, name: draino, namespace: kube-system}
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels: {component: draino}
name: draino
namespace: kube-system
spec:
# Draino does not currently support locking/master election, so you should
# only run one draino at a time. Draino won't start draining nodes immediately
# so it's usually safe for multiple drainos to exist for a brief period of
# time.
replicas: 1
selector:
matchLabels: {component: draino}
template:
metadata:
labels: {component: draino}
name: draino
namespace: kube-system
spec:
nodeSelector:
node-role.kubernetes.io/master: ""
hostNetwork: true
tolerations:
- effect: NoSchedule
operator: Exists
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
- key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
effect: NoSchedule
- key: node-role.kubernetes.io/master
effect: NoSchedule
containers:
# You'll want to change these labels and conditions to suit your deployment.
- command: [/draino, --node-label=draino-enabled=true, --evict-daemonset-pods, --evict-emptydir-pods, NotReady]
image: ${_docker_draino_prefix}draino:${DRAINO_TAG}
livenessProbe:
httpGet: {path: /healthz, port: 10002}
initialDelaySeconds: 30
name: draino
serviceAccountName: draino
EOF
}
kubectl apply -f ${DRAINO_DEPLOY}
fi
printf "Finished running ${step}\n"

View File

@ -0,0 +1,185 @@
#!/bin/sh
step="enable-auto-scaling"
printf "Starting to run ${step}\n"
. /etc/sysconfig/heat-params
_docker_ca_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}
# Either auto scaling or auto healing we need CA to be deployed
if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $AUTO_SCALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true"]; then
# Generate Autoscaler manifest file
AUTOSCALER_DEPLOY=/srv/magnum/kubernetes/manifests/autoscaler.yaml
[ -f ${AUTOSCALER_DEPLOY} ] || {
echo "Writing File: $AUTOSCALER_DEPLOY"
mkdir -p $(dirname ${AUTOSCALER_DEPLOY})
cat << EOF > ${AUTOSCALER_DEPLOY}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: cluster-autoscaler-role
rules:
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources:
- "pods"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
- "persistentvolumes"
verbs: ["watch", "list", "get"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["watch", "list", "get"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["watch", "list"]
- apiGroups: ["apps"]
resources: ["daemonsets", "replicasets", "statefulsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["watch", "list", "get"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["cluster-autoscaler-status"]
verbs: ["delete", "get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: cluster-autoscaler-rolebinding
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-autoscaler-role
subjects:
- kind: ServiceAccount
name: cluster-autoscaler-account
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cluster-autoscaler-account
namespace: kube-system
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
replicas: 1
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
nodeSelector:
node-role.kubernetes.io/master: ""
securityContext:
runAsUser: 1001
hostNetwork: True
tolerations:
- effect: NoSchedule
operator: Exists
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
- key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
effect: NoSchedule
- key: node-role.kubernetes.io/master
effect: NoSchedule
serviceAccountName: cluster-autoscaler-account
containers:
- name: cluster-autoscaler
image: ${_docker_ca_prefix}cluster-autoscaler:${AUTOSCALER_TAG}
imagePullPolicy: Always
command:
- ./cluster-autoscaler
- --alsologtostderr
- --cloud-provider=magnum
- --cluster-name=${CLUSTER_UUID}
- --cloud-config=/config/cloud-config
- --nodes=${MIN_NODE_COUNT}:${MAX_NODE_COUNT}:default-worker
- --scale-down-unneeded-time=10m
- --scale-down-delay-after-failure=3m
- --scale-down-delay-after-add=10m
volumeMounts:
- name: ca-bundle
mountPath: /etc/kubernetes
readOnly: true
- name: cloud-config
mountPath: /config
readOnly: true
volumes:
- name: ca-bundle
secret:
secretName: ca-bundle
- name: cloud-config
secret:
secretName: cluster-autoscaler-cloud-config
EOF
}
echo "Waiting for Kubernetes API..."
until [ "ok" = "$(curl --silent http://127.0.0.1:8080/healthz)" ]
do
sleep 5
done
kubectl create secret generic ca-bundle --from-file=/etc/kubernetes/ca-bundle.crt -n kube-system
cat <<EOF | kubectl apply -f -
---
apiVersion: v1
kind: Secret
metadata:
name: cluster-autoscaler-cloud-config
namespace: kube-system
type: Opaque
stringData:
cloud-config: |-
[Global]
auth-url=$AUTH_URL
user-id=$TRUSTEE_USER_ID
password=$TRUSTEE_PASSWORD
trust-id=$TRUST_ID
region=$REGION_NAME
ca-file=/etc/kubernetes/ca-bundle.crt
EOF
kubectl apply -f ${AUTOSCALER_DEPLOY}
fi
printf "Finished running ${step}\n"

View File

@ -90,3 +90,9 @@ write_files:
TILLER_NAMESPACE="$TILLER_NAMESPACE"
NODE_PROBLEM_DETECTOR_TAG="$NODE_PROBLEM_DETECTOR_TAG"
NGINX_INGRESS_CONTROLLER_TAG="$NGINX_INGRESS_CONTROLLER_TAG"
AUTO_HEALING_ENABLED="$AUTO_HEALING_ENABLED"
AUTO_SCALING_ENABLED="$AUTO_SCALING_ENABLED"
DRAINO_TAG="$DRAINO_TAG"
AUTOSCALER_TAG="$AUTOSCALER_TAG"
MIN_NODE_COUNT="$MIN_NODE_COUNT"
MAX_NODE_COUNT="$MAX_NODE_COUNT"

View File

@ -53,3 +53,4 @@ write_files:
KUBEPROXY_OPTIONS="$KUBEPROXY_OPTIONS"
OCTAVIA_ENABLED="$OCTAVIA_ENABLED"
HEAT_CONTAINER_AGENT_TAG="$HEAT_CONTAINER_AGENT_TAG"
AUTO_HEALING_ENABLED="$AUTO_HEALING_ENABLED"

View File

@ -116,15 +116,12 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
'to be true or unset.'))
label_list = ['kube_tag', 'container_infra_prefix',
'availability_zone',
'cgroup_driver',
'availability_zone', 'cgroup_driver',
'calico_tag', 'calico_cni_tag',
'calico_kube_controllers_tag', 'calico_ipv4pool',
'etcd_tag', 'flannel_tag', 'flannel_cni_tag',
'cloud_provider_enabled',
'cloud_provider_tag',
'prometheus_tag',
'grafana_tag',
'cloud_provider_enabled', 'cloud_provider_tag',
'prometheus_tag', 'grafana_tag',
'heat_container_agent_tag',
'keystone_auth_enabled', 'k8s_keystone_auth_tag',
'monitoring_enabled',
@ -132,7 +129,10 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
'tiller_tag',
'tiller_namespace',
'node_problem_detector_tag',
'nginx_ingress_controller_tag']
'nginx_ingress_controller_tag',
'auto_healing_enabled', 'auto_scaling_enabled',
'draino_tag', 'autoscaler_tag',
'min_node_count', 'max_node_count']
for label in label_list:
label_value = cluster.labels.get(label)
@ -146,6 +146,19 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
extra_params['kube_service_account_private_key'] = \
csr_keys["private_key"].replace("\n", "\\n")
extra_params['project_id'] = cluster.project_id
if not extra_params.get('max_node_count'):
extra_params['max_node_count'] = cluster.node_count + 1
self._set_cert_manager_params(cluster, extra_params)
return super(K8sFedoraTemplateDefinition,
self).get_params(context, cluster_template, cluster,
extra_params=extra_params,
**kwargs)
def _set_cert_manager_params(self, cluster, extra_params):
cert_manager_api = cluster.labels.get('cert_manager_api')
if strutils.bool_from_string(cert_manager_api):
extra_params['cert_manager_api'] = cert_manager_api
@ -161,13 +174,6 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
ca_cert.get_private_key(),
ca_cert.get_private_key_passphrase()).replace("\n", "\\n")
extra_params['project_id'] = cluster.project_id
return super(K8sFedoraTemplateDefinition,
self).get_params(context, cluster_template, cluster,
extra_params=extra_params,
**kwargs)
def get_env_files(self, cluster_template, cluster):
env_files = []

View File

@ -565,6 +565,20 @@ parameters:
description: namespace where tiller will be installed.
default: "magnum-tiller"
auto_healing_enabled:
type: boolean
description: >
true if the auto healing feature should be enabled
default:
false
auto_scaling_enabled:
type: boolean
description: >
true if the auto scaling feature should be enabled
default:
false
node_problem_detector_tag:
type: string
description: tag of the node problem detector container
@ -575,6 +589,27 @@ parameters:
description: nginx ingress controller docker image tag
default: 0.23.0
draino_tag:
type: string
description: tag of the draino container
default: abf028a
autoscaler_tag:
type: string
description: tag of the autoscaler container
default: v1.0
min_node_count:
type: number
description: >
minimum node count of cluster workers when doing scale down
default: 1
max_node_count:
type: number
description: >
maximum node count of cluster workers when doing scale up
resources:
######################################################################
@ -852,6 +887,12 @@ resources:
tiller_namespace: {get_param: tiller_namespace}
node_problem_detector_tag: {get_param: node_problem_detector_tag}
nginx_ingress_controller_tag: {get_param: nginx_ingress_controller_tag}
auto_healing_enabled: {get_param: auto_healing_enabled}
auto_scaling_enabled: {get_param: auto_scaling_enabled}
draino_tag: {get_param: draino_tag}
autoscaler_tag: {get_param: autoscaler_tag}
min_node_count: {get_param: min_node_count}
max_node_count: {get_param: max_node_count}
kube_cluster_config:
type: OS::Heat::SoftwareConfig
@ -882,6 +923,7 @@ resources:
- get_file: ../../common/templates/kubernetes/fragments/kube-dashboard-service.sh
- get_file: ../../common/templates/kubernetes/fragments/enable-keystone-auth.sh
- get_file: ../../common/templates/kubernetes/fragments/enable-auto-healing.sh
- get_file: ../../common/templates/kubernetes/fragments/enable-auto-scaling.sh
# Helm Based Installation Configuration Scripts
- get_file: ../../common/templates/kubernetes/helm/metrics-server.sh
- str_replace:
@ -979,6 +1021,7 @@ resources:
kubeproxy_options: {get_param: kubeproxy_options}
octavia_enabled: {get_param: octavia_enabled}
heat_container_agent_tag: {get_param: heat_container_agent_tag}
auto_healing_enabled: {get_param: auto_healing_enabled}
outputs:

View File

@ -439,6 +439,16 @@ parameters:
type: string
description: namespace where tiller will be installed
auto_healing_enabled:
type: boolean
description: >
true if the auto healing feature should be enabled
auto_scaling_enabled:
type: boolean
description: >
true if the auto scaling feature should be enabled
node_problem_detector_tag:
type: string
description: tag of the node problem detector container
@ -447,6 +457,24 @@ parameters:
type: string
description: nginx ingress controller docker image tag
draino_tag:
type: string
description: tag of the draino container
autoscaler_tag:
type: string
description: tag of the autoscaler container
min_node_count:
type: number
description: >
minimum node count of cluster workers when doing scale down
max_node_count:
type: number
description: >
maximum node count of cluster workers when doing scale up
resources:
######################################################################
#
@ -560,6 +588,12 @@ resources:
"$TILLER_NAMESPACE": {get_param: tiller_namespace}
"$NODE_PROBLEM_DETECTOR_TAG": {get_param: node_problem_detector_tag}
"$NGINX_INGRESS_CONTROLLER_TAG": {get_param: nginx_ingress_controller_tag}
"$AUTO_HEALING_ENABLED": {get_param: auto_healing_enabled}
"$AUTO_SCALING_ENABLED": {get_param: auto_scaling_enabled}
"$DRAINO_TAG": {get_param: draino_tag}
"$AUTOSCALER_TAG": {get_param: autoscaler_tag}
"$MIN_NODE_COUNT": {get_param: min_node_count}
"$MAX_NODE_COUNT": {get_param: max_node_count}
install_openstack_ca:
type: OS::Heat::SoftwareConfig

View File

@ -276,6 +276,11 @@ parameters:
type: string
description: tag of the heat_container_agent system container
auto_healing_enabled:
type: boolean
description: >
true if the auto healing feature should be enabled
resources:
start_container_agent:
@ -355,6 +360,8 @@ resources:
$KUBEPROXY_OPTIONS: {get_param: kubeproxy_options}
$OCTAVIA_ENABLED: {get_param: octavia_enabled}
$HEAT_CONTAINER_AGENT_TAG: {get_param: heat_container_agent_tag}
$AUTO_HEALING_ENABLED: {get_param: auto_healing_enabled}
install_openstack_ca:
type: OS::Heat::SoftwareConfig

View File

@ -331,11 +331,15 @@ class TestClusterConductorWithK8s(base.TestCase):
'kube_service_account_key': 'public_key',
'kube_service_account_private_key': 'private_key',
'portal_network_cidr': '10.254.0.0/16',
'project_id': 'project_id'
'project_id': 'project_id',
'max_node_count': 2,
}
if missing_attr is not None:
expected.pop(mapping[missing_attr], None)
if missing_attr == 'node_count':
expected['max_node_count'] = None
self.assertEqual(expected, definition)
self.assertEqual(
['../../common/templates/environments/no_private_network.yaml',
@ -459,7 +463,8 @@ class TestClusterConductorWithK8s(base.TestCase):
'kube_service_account_key': 'public_key',
'kube_service_account_private_key': 'private_key',
'portal_network_cidr': '10.254.0.0/16',
'project_id': 'project_id'
'project_id': 'project_id',
'max_node_count': 2,
}
self.assertEqual(expected, definition)
@ -574,7 +579,8 @@ class TestClusterConductorWithK8s(base.TestCase):
'kube_service_account_key': 'public_key',
'kube_service_account_private_key': 'private_key',
'portal_network_cidr': '10.254.0.0/16',
'project_id': 'project_id'
'project_id': 'project_id',
'max_node_count': 2,
}
self.assertEqual(expected, definition)
self.assertEqual(
@ -1000,7 +1006,8 @@ class TestClusterConductorWithK8s(base.TestCase):
'kube_service_account_key': 'public_key',
'kube_service_account_private_key': 'private_key',
'portal_network_cidr': '10.254.0.0/16',
'project_id': 'project_id'
'project_id': 'project_id',
'max_node_count': 2,
}
self.assertEqual(expected, definition)
self.assertEqual(

View File

@ -517,6 +517,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
tiller_namespace = mock_cluster.labels.get(
'tiller_namespace')
npd_tag = mock_cluster.labels.get('node_problem_detector_tag')
auto_healing_enabled = mock_cluster.labels.get(
'auto_healing_enabled')
auto_scaling_enabled = mock_cluster.labels.get(
'auto_scaling_enabled')
draino_tag = mock_cluster.labels.get('draino_tag')
autoscaler_tag = mock_cluster.labels.get('autoscaler_tag')
min_node_count = mock_cluster.labels.get('min_node_count')
max_node_count = mock_cluster.labels.get('max_node_count')
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
@ -581,6 +589,12 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_tag': tiller_tag,
'tiller_namespace': tiller_namespace,
'node_problem_detector_tag': npd_tag,
'auto_healing_enabled': auto_healing_enabled,
'auto_scaling_enabled': auto_scaling_enabled,
'draino_tag': draino_tag,
'autoscaler_tag': autoscaler_tag,
'min_node_count': min_node_count,
'max_node_count': max_node_count,
}}
mock_get_params.assert_called_once_with(mock_context,
mock_cluster_template,
@ -893,6 +907,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
tiller_namespace = mock_cluster.labels.get(
'tiller_namespace')
npd_tag = mock_cluster.labels.get('node_problem_detector_tag')
auto_healing_enabled = mock_cluster.labels.get(
'auto_healing_enabled')
auto_scaling_enabled = mock_cluster.labels.get(
'auto_scaling_enabled')
draino_tag = mock_cluster.labels.get('draino_tag')
autoscaler_tag = mock_cluster.labels.get('autoscaler_tag')
min_node_count = mock_cluster.labels.get('min_node_count')
max_node_count = mock_cluster.labels.get('max_node_count')
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
@ -959,6 +981,12 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_tag': tiller_tag,
'tiller_namespace': tiller_namespace,
'node_problem_detector_tag': npd_tag,
'auto_healing_enabled': auto_healing_enabled,
'auto_scaling_enabled': auto_scaling_enabled,
'draino_tag': draino_tag,
'autoscaler_tag': autoscaler_tag,
'min_node_count': min_node_count,
'max_node_count': max_node_count,
}}
mock_get_params.assert_called_once_with(mock_context,
mock_cluster_template,

View File

@ -0,0 +1,11 @@
---
features:
- |
Using Node Problem Detector, Draino and AutoScaler to support
auto healing for K8s cluster, user can use a new label
"auto_healing_enabled' to turn on/off it.
Meanwhile, a new label "auto_scaling_enabled" is also introduced
to enable the capability to let the k8s cluster auto scale based
its workload.