diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 5315193920..79a36de6c3 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -416,6 +416,9 @@ the table are linked to more details elsewhere in the user guide. +---------------------------------------+--------------------+---------------+ | `max_node_count`_ | see below | see below | +---------------------------------------+--------------------+---------------+ +| `npd_enabled`_ | - true | true | +| | - false | | ++---------------------------------------+--------------------+---------------+ Cluster ------- @@ -1327,6 +1330,10 @@ _`max_node_count` The maxmium node count of the cluster when doing auto scaling or auto healing. +_`npd_enabled` + Set Node Problem Detector service enabled or disabled. Default enabled. + + External load balancer for services ----------------------------------- diff --git a/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh index 7e9e9e0dcd..1e95c995a7 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh @@ -7,13 +7,16 @@ printf "Starting to run ${step}\n" _gcr_prefix=${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/} -# Generate Node Problem Detector manifest file -NPD_DEPLOY=/srv/magnum/kubernetes/manifests/npd.yaml +# Either auto scaling or auto healing we need CA to be deployed +if [ "$(echo $AUTO_HEALING_ENABLED | tr '[:upper:]' '[:lower:]')" = "true" || "$(echo $NPD_ENABLED | tr '[:upper:]' '[:lower:]')" = "true"]; then + # Generate Node Problem Detector manifest file + NPD_DEPLOY=/srv/magnum/kubernetes/manifests/npd.yaml -[ -f ${NPD_DEPLOY} ] || { - echo "Writing File: $NPD_DEPLOY" - mkdir -p $(dirname ${NPD_DEPLOY}) - cat << EOF > ${NPD_DEPLOY} + [ -f ${NPD_DEPLOY} ] || { + echo "Writing File: $NPD_DEPLOY" + mkdir -p $(dirname ${NPD_DEPLOY}) + cat << EOF > ${NPD_DEPLOY} +--- apiVersion: v1 kind: ServiceAccount metadata: @@ -104,17 +107,19 @@ spec: - key: "CriticalAddonsOnly" operator: "Exists" EOF -} + } -echo "Waiting for Kubernetes API..." -until [ "ok" = "$(curl --silent http://127.0.0.1:8080/healthz)" ] -do - sleep 5 -done + echo "Waiting for Kubernetes API..." + until [ "ok" = "$(curl --silent http://127.0.0.1:8080/healthz)" ] + do + sleep 5 + done -kubectl apply -f ${NPD_DEPLOY} + kubectl apply -f ${NPD_DEPLOY} + + printf "Finished running ${step}\n" +fi -printf "Finished running ${step}\n" _docker_draino_prefix=${CONTAINER_INFRA_PREFIX:-docker.io/planetlabs/} step="enable-auto-healing" diff --git a/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh b/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh index 11cc6f897a..2aa6008498 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh @@ -101,6 +101,7 @@ HEAT_PARAMS=/etc/sysconfig/heat-params AUTOSCALER_TAG="$AUTOSCALER_TAG" MIN_NODE_COUNT="$MIN_NODE_COUNT" MAX_NODE_COUNT="$MAX_NODE_COUNT" + NPD_ENABLED="$NPD_ENABLED" EOF } diff --git a/magnum/drivers/heat/k8s_fedora_template_def.py b/magnum/drivers/heat/k8s_fedora_template_def.py index 5ee86e1d73..4bd399a61d 100644 --- a/magnum/drivers/heat/k8s_fedora_template_def.py +++ b/magnum/drivers/heat/k8s_fedora_template_def.py @@ -141,7 +141,7 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition): 'nginx_ingress_controller_tag', 'auto_healing_enabled', 'auto_scaling_enabled', 'draino_tag', 'autoscaler_tag', - 'min_node_count', 'max_node_count'] + 'min_node_count', 'max_node_count', 'npd_enabled'] for label in label_list: label_value = cluster.labels.get(label) diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml index b557117602..3b153812a2 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml @@ -653,6 +653,12 @@ parameters: max batch size when doing rolling upgrade default: 1 + npd_enabled: + type: boolean + description: > + true if the npd service should be launched + default: + true resources: @@ -947,6 +953,7 @@ resources: autoscaler_tag: {get_param: autoscaler_tag} min_node_count: {get_param: min_node_count} max_node_count: {get_param: max_node_count} + npd_enabled: {get_param: npd_enabled} kube_cluster_config: type: OS::Heat::SoftwareConfig @@ -1081,6 +1088,7 @@ resources: octavia_enabled: {get_param: octavia_enabled} heat_container_agent_tag: {get_param: heat_container_agent_tag} auto_healing_enabled: {get_param: auto_healing_enabled} + npd_enabled: {get_param: npd_enabled} outputs: diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml index 2b9ff0b6c5..e4d612e4ea 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml @@ -488,6 +488,13 @@ parameters: description: > maximum node count of cluster workers when doing scale up + npd_enabled: + type: boolean + description: > + true if the npd service should be launched + default: + true + resources: ###################################################################### # @@ -632,6 +639,7 @@ resources: "$AUTOSCALER_TAG": {get_param: autoscaler_tag} "$MIN_NODE_COUNT": {get_param: min_node_count} "$MAX_NODE_COUNT": {get_param: max_node_count} + "$NPD_ENABLED": {get_param: npd_enabled} - get_file: ../../common/templates/kubernetes/fragments/make-cert.sh - get_file: ../../common/templates/kubernetes/fragments/configure-etcd.sh - get_file: ../../common/templates/kubernetes/fragments/write-kube-os-config.sh diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml index 8c86996841..2dd02e0d27 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml @@ -281,6 +281,13 @@ parameters: description: > true if the auto healing feature should be enabled + npd_enabled: + type: boolean + description: > + true if the npd service should be launched + default: + true + resources: agent_config: @@ -366,6 +373,7 @@ resources: $OCTAVIA_ENABLED: {get_param: octavia_enabled} $HEAT_CONTAINER_AGENT_TAG: {get_param: heat_container_agent_tag} $AUTO_HEALING_ENABLED: {get_param: auto_healing_enabled} + $NPD_ENABLED: {get_param: npd_enabled} - get_file: ../../common/templates/kubernetes/fragments/write-kube-os-config.sh - get_file: ../../common/templates/kubernetes/fragments/make-cert-client.sh - get_file: ../../common/templates/fragments/configure-docker-registry.sh diff --git a/magnum/tests/unit/drivers/test_template_definition.py b/magnum/tests/unit/drivers/test_template_definition.py index 9bbfd3b266..7a9dcdd9f6 100644 --- a/magnum/tests/unit/drivers/test_template_definition.py +++ b/magnum/tests/unit/drivers/test_template_definition.py @@ -530,6 +530,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): autoscaler_tag = mock_cluster.labels.get('autoscaler_tag') min_node_count = mock_cluster.labels.get('min_node_count') max_node_count = mock_cluster.labels.get('max_node_count') + npd_enabled = mock_cluster.labels.get('npd_enabled') master_image = mock_cluster_template.image_id minion_image = mock_cluster_template.image_id @@ -607,6 +608,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'traefik_ingress_controller_tag': traefik_ingress_controller_tag, 'master_image': master_image, 'minion_image': minion_image, + 'npd_enabled': npd_enabled, 'kube_version': kube_tag, 'master_kube_tag': kube_tag, 'minion_kube_tag': kube_tag, @@ -938,6 +940,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): autoscaler_tag = mock_cluster.labels.get('autoscaler_tag') min_node_count = mock_cluster.labels.get('min_node_count') max_node_count = mock_cluster.labels.get('max_node_count') + npd_enabled = mock_cluster.labels.get('npd_enabled') master_image = mock_cluster_template.image_id minion_image = mock_cluster_template.image_id @@ -1017,6 +1020,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'traefik_ingress_controller_tag': traefik_ingress_controller_tag, 'master_image': master_image, 'minion_image': minion_image, + 'npd_enabled': npd_enabled, 'kube_version': kube_tag, 'master_kube_tag': kube_tag, 'minion_kube_tag': kube_tag,