Browse Source

Merge "[fedora-atomic-k8s] Adding Node Problem Detector"

changes/78/631378/14
Zuul 2 years ago
committed by Gerrit Code Review
parent
commit
e6f4969539
  1. 117
      magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh
  2. 1
      magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.yaml
  3. 3
      magnum/drivers/heat/k8s_fedora_template_def.py
  4. 7
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
  5. 5
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml
  6. 4
      magnum/tests/unit/drivers/test_template_definition.py

117
magnum/drivers/common/templates/kubernetes/fragments/enable-auto-healing.sh

@ -0,0 +1,117 @@
#!/bin/sh
step="enable-auto-healing"
printf "Starting to run ${step}\n"
. /etc/sysconfig/heat-params
_gcr_prefix=${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}
# Generate Node Problem Detector manifest file
NPD_DEPLOY=/srv/magnum/kubernetes/manifests/npd.yaml
[ -f ${NPD_DEPLOY} ] || {
echo "Writing File: $NPD_DEPLOY"
mkdir -p $(dirname ${NPD_DEPLOY})
cat << EOF > ${NPD_DEPLOY}
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-problem-detector
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: npd-binding
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:node-problem-detector
subjects:
- kind: ServiceAccount
name: node-problem-detector
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: npd
namespace: kube-system
labels:
k8s-app: node-problem-detector
version: ${NODE_PROBLEM_DETECTOR_TAG}
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
spec:
selector:
matchLabels:
k8s-app: node-problem-detector
version: ${NODE_PROBLEM_DETECTOR_TAG}
template:
metadata:
labels:
k8s-app: node-problem-detector
version: ${NODE_PROBLEM_DETECTOR_TAG}
kubernetes.io/cluster-service: "true"
spec:
containers:
- name: node-problem-detector
image: ${_gcr_prefix}node-problem-detector:${NODE_PROBLEM_DETECTOR_TAG}
command:
- "/bin/sh"
- "-c"
# Pass both config to support both journald and syslog.
- "exec /node-problem-detector --logtostderr --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json,/config/docker-monitor.json,/config/docker-monitor-filelog.json >>/var/log/node-problem-detector.log 2>&1"
securityContext:
privileged: true
resources:
limits:
cpu: "200m"
memory: "100Mi"
requests:
cpu: "20m"
memory: "20Mi"
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: log
mountPath: /var/log
- name: localtime
mountPath: /etc/localtime
readOnly: true
volumes:
- name: log
hostPath:
path: /var/log/
- name: localtime
hostPath:
path: /etc/localtime
type: "FileOrCreate"
serviceAccountName: node-problem-detector
tolerations:
- operator: "Exists"
effect: "NoExecute"
- key: "CriticalAddonsOnly"
operator: "Exists"
EOF
}
echo "Waiting for Kubernetes API..."
until [ "ok" = "$(curl --silent http://127.0.0.1:8080/healthz)" ]
do
sleep 5
done
kubectl apply -f ${NPD_DEPLOY}
printf "Finished running ${step}\n"

1
magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.yaml

@ -87,3 +87,4 @@ write_files:
TILLER_ENABLED="$TILLER_ENABLED"
TILLER_TAG="$TILLER_TAG"
TILLER_NAMESPACE="$TILLER_NAMESPACE"
NODE_PROBLEM_DETECTOR_TAG="$NODE_PROBLEM_DETECTOR_TAG"

3
magnum/drivers/heat/k8s_fedora_template_def.py

@ -118,7 +118,8 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
'keystone_auth_enabled', 'k8s_keystone_auth_tag',
'tiller_enabled',
'tiller_tag',
'tiller_namespace']
'tiller_namespace',
'node_problem_detector_tag']
for label in label_list:
label_value = cluster.labels.get(label)

7
magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml

@ -560,6 +560,11 @@ parameters:
description: namespace where tiller will be installed.
default: "magnum-tiller"
node_problem_detector_tag:
type: string
description: tag of the node problem detector container
default: v0.6.2
resources:
######################################################################
@ -824,6 +829,7 @@ resources:
tiller_enabled: {get_param: tiller_enabled}
tiller_tag: {get_param: tiller_tag}
tiller_namespace: {get_param: tiller_namespace}
node_problem_detector_tag: {get_param: node_problem_detector_tag}
kube_cluster_config:
type: OS::Heat::SoftwareConfig
@ -855,6 +861,7 @@ resources:
template: {get_file: ../../common/templates/kubernetes/fragments/enable-ingress-controller.sh}
- get_file: ../../common/templates/kubernetes/fragments/kube-dashboard-service.sh
- get_file: ../../common/templates/kubernetes/fragments/enable-keystone-auth.sh
- get_file: ../../common/templates/kubernetes/fragments/enable-auto-healing.sh
kube_cluster_deploy:
type: OS::Heat::SoftwareDeployment

5
magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml

@ -434,6 +434,10 @@ parameters:
type: string
description: namespace where tiller will be installed
node_problem_detector_tag:
type: string
description: tag of the node problem detector container
resources:
######################################################################
#
@ -544,6 +548,7 @@ resources:
"$TILLER_ENABLED": {get_param: tiller_enabled}
"$TILLER_TAG": {get_param: tiller_tag}
"$TILLER_NAMESPACE": {get_param: tiller_namespace}
"$NODE_PROBLEM_DETECTOR_TAG": {get_param: node_problem_detector_tag}
install_openstack_ca:
type: OS::Heat::SoftwareConfig

4
magnum/tests/unit/drivers/test_template_definition.py

@ -425,6 +425,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_tag')
tiller_namespace = mock_cluster.labels.get(
'tiller_namespace')
npd_tag = mock_cluster.labels.get('node_problem_detector_tag')
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
@ -486,6 +487,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_enabled': tiller_enabled,
'tiller_tag': tiller_tag,
'tiller_namespace': tiller_namespace,
'node_problem_detector_tag': npd_tag,
}}
mock_get_params.assert_called_once_with(mock_context,
mock_cluster_template,
@ -793,6 +795,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_tag')
tiller_namespace = mock_cluster.labels.get(
'tiller_namespace')
npd_tag = mock_cluster.labels.get('node_problem_detector_tag')
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
@ -856,6 +859,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
'tiller_enabled': tiller_enabled,
'tiller_tag': tiller_tag,
'tiller_namespace': tiller_namespace,
'node_problem_detector_tag': npd_tag,
}}
mock_get_params.assert_called_once_with(mock_context,
mock_cluster_template,

Loading…
Cancel
Save