diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 8cb8c27db1..579cfb323e 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -392,6 +392,22 @@ the table are linked to more details elsewhere in the user guide. | `master_lb_floating_ip_enabled`_ | - true | see below | | | - false | | +---------------------------------------+--------------------+---------------+ +| `auto_healing_enabled`_ | - true | false | +| | - false | | ++---------------------------------------+--------------------+---------------+ +| `auto_scaling_enabled`_ | - true | true | +| | - false | | ++---------------------------------------+--------------------+---------------+ +| `node_problem_detector_tag`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `draino_tag`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `autoscaler_tag`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `min_node_count`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `max_node_count`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ Cluster ------- @@ -1119,6 +1135,9 @@ _`container_infra_prefix` * quay.io/coreos/configmap-reload:v0.0.1 * quay.io/coreos/prometheus-config-reloader:v0.26.0 * quay.io/prometheus/prometheus:v2.5.0 + * k8s.gcr.io/node-problem-detector:v0.6.2 + * docker.io/planetlabs/draino:abf028a + * docker.io/openstackmagnum/cluster-autoscaler:v1.0 _`kube_tag` This label allows users to select `a specific Kubernetes release, @@ -1257,6 +1276,31 @@ _`master_lb_floating_ip_enabled` ``master_lb_enabled`` is set. If not specified, the default value is the same as template property ``floating_ip_enabled``. +_`auto_healing_enabled` + If set to true, auto healing feature will be enabled. Defaults to false. + +_`auto_scaling_enabled` + If set to true, auto scaling feature will be enabled. Defaults to true. + +_`node_problem_detector_tag` + This label allows users to select a specific Node Problem Detector + version. + +_`draino_tag` + This label allows users to select a specific Draino version. + +_`autoscaler_tag` + This label allows users to select a specific Cluster Autoscaler version. + +_`min_node_count` + The minmium node count of the cluster when doing auto scaling or auto + healing. Defaults to 1. + +_`max_node_count` + The maxmium node count of the cluster when doing auto scaling or auto + healing. + + External load balancer for services ----------------------------------- diff --git a/magnum/drivers/common/templates/kubernetes/fragments/configure-kubernetes-minion.sh b/magnum/drivers/common/templates/kubernetes/fragments/configure-kubernetes-minion.sh index cbcfec50ae..33652090fc 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/configure-kubernetes-minion.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/configure-kubernetes-minion.sh @@ -154,6 +154,10 @@ KUBELET_ARGS="${KUBELET_ARGS} --client-ca-file=${CERT_DIR}/ca.crt --tls-cert-fil # specified cgroup driver KUBELET_ARGS="${KUBELET_ARGS} --cgroup-driver=${CGROUP_DRIVER}" +if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then + KUBELET_ARGS="${KUBELET_ARGS} --node-labels=draino-enabled=true" +fi + systemctl disable docker if cat /usr/lib/systemd/system/docker.service | grep 'native.cgroupdriver'; then cp /usr/lib/systemd/system/docker.service /etc/systemd/system/ diff --git a/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh index abb70867a9..7e9e9e0dcd 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh @@ -1,6 +1,6 @@ #!/bin/sh -step="enable-auto-healing" +step="enable-node-problem-detector" printf "Starting to run ${step}\n" . /etc/sysconfig/heat-params @@ -68,7 +68,7 @@ spec: - "/bin/sh" - "-c" # Pass both config to support both journald and syslog. - - "exec /node-problem-detector --logtostderr --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json,/config/docker-monitor.json,/config/docker-monitor-filelog.json >>/var/log/node-problem-detector.log 2>&1" + - "exec /node-problem-detector --logtostderr --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json,/config/docker-monitor.json,/config/docker-monitor-filelog.json 2>&1 | tee /var/log/node-problem-detector.log" securityContext: privileged: true resources: @@ -115,3 +115,109 @@ done kubectl apply -f ${NPD_DEPLOY} printf "Finished running ${step}\n" + +_docker_draino_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/planetlabs/} +step="enable-auto-healing" +printf "Starting to run ${step}\n" + +if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" ]; then + # Generate Draino manifest file + DRAINO_DEPLOY=/srv/magnum/kubernetes/manifests/draino.yaml + + [ -f ${DRAINO_DEPLOY} ] || { + echo "Writing File: $DRAINO_DEPLOY" + mkdir -p $(dirname ${DRAINO_DEPLOY}) + cat << EOF > ${DRAINO_DEPLOY} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: {component: draino} + name: draino + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: {component: draino} + name: draino +rules: +- apiGroups: [''] + resources: [events] + verbs: [create, patch, update] +- apiGroups: [''] + resources: [nodes] + verbs: [get, watch, list, update] +- apiGroups: [''] + resources: [nodes/status] + verbs: [patch] +- apiGroups: [''] + resources: [pods] + verbs: [get, watch, list] +- apiGroups: [''] + resources: [pods/eviction] + verbs: [create] +- apiGroups: [extensions] + resources: [daemonsets] + verbs: [get, watch, list] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: {component: draino} + name: draino +roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: draino} +subjects: +- {kind: ServiceAccount, name: draino, namespace: kube-system} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: {component: draino} + name: draino + namespace: kube-system +spec: + # Draino does not currently support locking/master election, so you should + # only run one draino at a time. Draino won't start draining nodes immediately + # so it's usually safe for multiple drainos to exist for a brief period of + # time. + replicas: 1 + selector: + matchLabels: {component: draino} + template: + metadata: + labels: {component: draino} + name: draino + namespace: kube-system + spec: + nodeSelector: + node-role.kubernetes.io/master: "" + hostNetwork: true + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + operator: Exists + - key: node.cloudprovider.kubernetes.io/uninitialized + value: "true" + effect: NoSchedule + - key: node-role.kubernetes.io/master + effect: NoSchedule + containers: + # You'll want to change these labels and conditions to suit your deployment. + - command: [/draino, --node-label=draino-enabled=true, --evict-daemonset-pods, --evict-emptydir-pods, NotReady] + image: ${_docker_draino_prefix}draino:${DRAINO_TAG} + livenessProbe: + httpGet: {path: /healthz, port: 10002} + initialDelaySeconds: 30 + name: draino + serviceAccountName: draino +EOF + } + + kubectl apply -f ${DRAINO_DEPLOY} + +fi +printf "Finished running ${step}\n" diff --git a/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-scaling.sh b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-scaling.sh new file mode 100644 index 0000000000..d9e09e7ad6 --- /dev/null +++ b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-scaling.sh @@ -0,0 +1,185 @@ +#!/bin/sh + +step="enable-auto-scaling" +printf "Starting to run ${step}\n" + +. /etc/sysconfig/heat-params + +_docker_ca_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/} + +# Either auto scaling or auto healing we need CA to be deployed +if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $AUTO_SCALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true"]; then + # Generate Autoscaler manifest file + AUTOSCALER_DEPLOY=/srv/magnum/kubernetes/manifests/autoscaler.yaml + + [ -f ${AUTOSCALER_DEPLOY} ] || { + echo "Writing File: $AUTOSCALER_DEPLOY" + mkdir -p $(dirname ${AUTOSCALER_DEPLOY}) + cat << EOF > ${AUTOSCALER_DEPLOY} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: cluster-autoscaler-role +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["daemonsets", "replicasets", "statefulsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["watch", "list", "get"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status"] + verbs: ["delete", "get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler-rolebinding + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler-role +subjects: + - kind: ServiceAccount + name: cluster-autoscaler-account + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-autoscaler-account + namespace: kube-system +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + namespace: kube-system + labels: + app: cluster-autoscaler + spec: + nodeSelector: + node-role.kubernetes.io/master: "" + securityContext: + runAsUser: 1001 + hostNetwork: True + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + operator: Exists + - key: node.cloudprovider.kubernetes.io/uninitialized + value: "true" + effect: NoSchedule + - key: node-role.kubernetes.io/master + effect: NoSchedule + serviceAccountName: cluster-autoscaler-account + containers: + - name: cluster-autoscaler + image: ${_docker_ca_prefix}cluster-autoscaler:${AUTOSCALER_TAG} + imagePullPolicy: Always + command: + - ./cluster-autoscaler + - --alsologtostderr + - --cloud-provider=magnum + - --cluster-name=${CLUSTER_UUID} + - --cloud-config=/config/cloud-config + - --nodes=${MIN_NODE_COUNT}:${MAX_NODE_COUNT}:default-worker + - --scale-down-unneeded-time=10m + - --scale-down-delay-after-failure=3m + - --scale-down-delay-after-add=10m + volumeMounts: + - name: ca-bundle + mountPath: /etc/kubernetes + readOnly: true + - name: cloud-config + mountPath: /config + readOnly: true + volumes: + - name: ca-bundle + secret: + secretName: ca-bundle + - name: cloud-config + secret: + secretName: cluster-autoscaler-cloud-config +EOF + } + + echo "Waiting for Kubernetes API..." + until [ "ok" = "$(curl --silent http://127.0.0.1:8080/healthz)" ] + do + sleep 5 + done + + kubectl create secret generic ca-bundle --from-file=/etc/kubernetes/ca-bundle.crt -n kube-system + + cat < + true if the auto healing feature should be enabled + default: + false + + auto_scaling_enabled: + type: boolean + description: > + true if the auto scaling feature should be enabled + default: + false + node_problem_detector_tag: type: string description: tag of the node problem detector container @@ -575,6 +589,27 @@ parameters: description: nginx ingress controller docker image tag default: 0.23.0 + draino_tag: + type: string + description: tag of the draino container + default: abf028a + + autoscaler_tag: + type: string + description: tag of the autoscaler container + default: v1.0 + + min_node_count: + type: number + description: > + minimum node count of cluster workers when doing scale down + default: 1 + + max_node_count: + type: number + description: > + maximum node count of cluster workers when doing scale up + resources: ###################################################################### @@ -852,6 +887,12 @@ resources: tiller_namespace: {get_param: tiller_namespace} node_problem_detector_tag: {get_param: node_problem_detector_tag} nginx_ingress_controller_tag: {get_param: nginx_ingress_controller_tag} + auto_healing_enabled: {get_param: auto_healing_enabled} + auto_scaling_enabled: {get_param: auto_scaling_enabled} + draino_tag: {get_param: draino_tag} + autoscaler_tag: {get_param: autoscaler_tag} + min_node_count: {get_param: min_node_count} + max_node_count: {get_param: max_node_count} kube_cluster_config: type: OS::Heat::SoftwareConfig @@ -882,6 +923,7 @@ resources: - get_file: ../../common/templates/kubernetes/fragments/kube-dashboard-service.sh - get_file: ../../common/templates/kubernetes/fragments/enable-keystone-auth.sh - get_file: ../../common/templates/kubernetes/fragments/enable-auto-healing.sh + - get_file: ../../common/templates/kubernetes/fragments/enable-auto-scaling.sh # Helm Based Installation Configuration Scripts - get_file: ../../common/templates/kubernetes/helm/metrics-server.sh - str_replace: @@ -979,6 +1021,7 @@ resources: kubeproxy_options: {get_param: kubeproxy_options} octavia_enabled: {get_param: octavia_enabled} heat_container_agent_tag: {get_param: heat_container_agent_tag} + auto_healing_enabled: {get_param: auto_healing_enabled} outputs: diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml index e15fcb304b..b5ee5e7daa 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml @@ -439,6 +439,16 @@ parameters: type: string description: namespace where tiller will be installed + auto_healing_enabled: + type: boolean + description: > + true if the auto healing feature should be enabled + + auto_scaling_enabled: + type: boolean + description: > + true if the auto scaling feature should be enabled + node_problem_detector_tag: type: string description: tag of the node problem detector container @@ -447,6 +457,24 @@ parameters: type: string description: nginx ingress controller docker image tag + draino_tag: + type: string + description: tag of the draino container + + autoscaler_tag: + type: string + description: tag of the autoscaler container + + min_node_count: + type: number + description: > + minimum node count of cluster workers when doing scale down + + max_node_count: + type: number + description: > + maximum node count of cluster workers when doing scale up + resources: ###################################################################### # @@ -560,6 +588,12 @@ resources: "$TILLER_NAMESPACE": {get_param: tiller_namespace} "$NODE_PROBLEM_DETECTOR_TAG": {get_param: node_problem_detector_tag} "$NGINX_INGRESS_CONTROLLER_TAG": {get_param: nginx_ingress_controller_tag} + "$AUTO_HEALING_ENABLED": {get_param: auto_healing_enabled} + "$AUTO_SCALING_ENABLED": {get_param: auto_scaling_enabled} + "$DRAINO_TAG": {get_param: draino_tag} + "$AUTOSCALER_TAG": {get_param: autoscaler_tag} + "$MIN_NODE_COUNT": {get_param: min_node_count} + "$MAX_NODE_COUNT": {get_param: max_node_count} install_openstack_ca: type: OS::Heat::SoftwareConfig diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml index c7aa30cc12..087dc1b72b 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml @@ -276,6 +276,11 @@ parameters: type: string description: tag of the heat_container_agent system container + auto_healing_enabled: + type: boolean + description: > + true if the auto healing feature should be enabled + resources: start_container_agent: @@ -355,6 +360,8 @@ resources: $KUBEPROXY_OPTIONS: {get_param: kubeproxy_options} $OCTAVIA_ENABLED: {get_param: octavia_enabled} $HEAT_CONTAINER_AGENT_TAG: {get_param: heat_container_agent_tag} + $AUTO_HEALING_ENABLED: {get_param: auto_healing_enabled} + install_openstack_ca: type: OS::Heat::SoftwareConfig diff --git a/magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py b/magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py index 741d556778..091710c276 100644 --- a/magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py +++ b/magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py @@ -331,11 +331,15 @@ class TestClusterConductorWithK8s(base.TestCase): 'kube_service_account_key': 'public_key', 'kube_service_account_private_key': 'private_key', 'portal_network_cidr': '10.254.0.0/16', - 'project_id': 'project_id' + 'project_id': 'project_id', + 'max_node_count': 2, } if missing_attr is not None: expected.pop(mapping[missing_attr], None) + if missing_attr == 'node_count': + expected['max_node_count'] = None + self.assertEqual(expected, definition) self.assertEqual( ['../../common/templates/environments/no_private_network.yaml', @@ -459,7 +463,8 @@ class TestClusterConductorWithK8s(base.TestCase): 'kube_service_account_key': 'public_key', 'kube_service_account_private_key': 'private_key', 'portal_network_cidr': '10.254.0.0/16', - 'project_id': 'project_id' + 'project_id': 'project_id', + 'max_node_count': 2, } self.assertEqual(expected, definition) @@ -574,7 +579,8 @@ class TestClusterConductorWithK8s(base.TestCase): 'kube_service_account_key': 'public_key', 'kube_service_account_private_key': 'private_key', 'portal_network_cidr': '10.254.0.0/16', - 'project_id': 'project_id' + 'project_id': 'project_id', + 'max_node_count': 2, } self.assertEqual(expected, definition) self.assertEqual( @@ -1000,7 +1006,8 @@ class TestClusterConductorWithK8s(base.TestCase): 'kube_service_account_key': 'public_key', 'kube_service_account_private_key': 'private_key', 'portal_network_cidr': '10.254.0.0/16', - 'project_id': 'project_id' + 'project_id': 'project_id', + 'max_node_count': 2, } self.assertEqual(expected, definition) self.assertEqual( diff --git a/magnum/tests/unit/drivers/test_template_definition.py b/magnum/tests/unit/drivers/test_template_definition.py index 27da372157..a9f18c93c4 100644 --- a/magnum/tests/unit/drivers/test_template_definition.py +++ b/magnum/tests/unit/drivers/test_template_definition.py @@ -517,6 +517,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): tiller_namespace = mock_cluster.labels.get( 'tiller_namespace') npd_tag = mock_cluster.labels.get('node_problem_detector_tag') + auto_healing_enabled = mock_cluster.labels.get( + 'auto_healing_enabled') + auto_scaling_enabled = mock_cluster.labels.get( + 'auto_scaling_enabled') + draino_tag = mock_cluster.labels.get('draino_tag') + autoscaler_tag = mock_cluster.labels.get('autoscaler_tag') + min_node_count = mock_cluster.labels.get('min_node_count') + max_node_count = mock_cluster.labels.get('max_node_count') k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition() @@ -581,6 +589,12 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'tiller_tag': tiller_tag, 'tiller_namespace': tiller_namespace, 'node_problem_detector_tag': npd_tag, + 'auto_healing_enabled': auto_healing_enabled, + 'auto_scaling_enabled': auto_scaling_enabled, + 'draino_tag': draino_tag, + 'autoscaler_tag': autoscaler_tag, + 'min_node_count': min_node_count, + 'max_node_count': max_node_count, }} mock_get_params.assert_called_once_with(mock_context, mock_cluster_template, @@ -893,6 +907,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): tiller_namespace = mock_cluster.labels.get( 'tiller_namespace') npd_tag = mock_cluster.labels.get('node_problem_detector_tag') + auto_healing_enabled = mock_cluster.labels.get( + 'auto_healing_enabled') + auto_scaling_enabled = mock_cluster.labels.get( + 'auto_scaling_enabled') + draino_tag = mock_cluster.labels.get('draino_tag') + autoscaler_tag = mock_cluster.labels.get('autoscaler_tag') + min_node_count = mock_cluster.labels.get('min_node_count') + max_node_count = mock_cluster.labels.get('max_node_count') k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition() @@ -959,6 +981,12 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'tiller_tag': tiller_tag, 'tiller_namespace': tiller_namespace, 'node_problem_detector_tag': npd_tag, + 'auto_healing_enabled': auto_healing_enabled, + 'auto_scaling_enabled': auto_scaling_enabled, + 'draino_tag': draino_tag, + 'autoscaler_tag': autoscaler_tag, + 'min_node_count': min_node_count, + 'max_node_count': max_node_count, }} mock_get_params.assert_called_once_with(mock_context, mock_cluster_template, diff --git a/releasenotes/notes/support-auto-healing-3e07c16c55209b0a.yaml b/releasenotes/notes/support-auto-healing-3e07c16c55209b0a.yaml new file mode 100644 index 0000000000..5eb1f58545 --- /dev/null +++ b/releasenotes/notes/support-auto-healing-3e07c16c55209b0a.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Using Node Problem Detector, Draino and AutoScaler to support + auto healing for K8s cluster, user can use a new label + "auto_healing_enabled' to turn on/off it. + + Meanwhile, a new label "auto_scaling_enabled" is also introduced + to enable the capability to let the k8s cluster auto scale based + its workload. +