Browse Source

Fix prometheus monitoring

There are 2 changes included in this patch:

1. Using cluster ip instead of fixed ip for grafana service to
make sure the address is reachable.

2. Move node exporter to prometheus-monitoring namespace and
make it as a DaemonSet to collect metrics from master node.

Task: 28468
Story: 2004590

Change-Id: I9090c6dc4b38e1a1466c4c3a6a827d95c089fb41
changes/58/624558/5
Feilong Wang 3 years ago
parent
commit
b6936894c4
  1. 32
      magnum/drivers/common/templates/kubernetes/fragments/enable-node-exporter.sh
  2. 89
      magnum/drivers/common/templates/kubernetes/fragments/enable-prometheus-monitoring.sh
  3. 3
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
  4. 7
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml
  5. 3
      magnum/drivers/k8s_fedora_ironic_v1/templates/kubecluster.yaml
  6. 7
      magnum/drivers/k8s_fedora_ironic_v1/templates/kubeminion_software_configs.yaml

32
magnum/drivers/common/templates/kubernetes/fragments/enable-node-exporter.sh

@ -1,32 +0,0 @@
#!/bin/sh
. /etc/sysconfig/heat-params
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
exit 0
fi
# Write node-exporter manifest as a regular pod
node_exporter_file=/etc/kubernetes/manifests/node-exporter.yaml
[ -f ${node_exporter_file} ] || {
echo "Writing File: $node_exporter_file"
mkdir -p $(dirname ${node_exporter_file})
cat << EOF > ${node_exporter_file}
apiVersion: v1
kind: Pod
metadata:
name: node-exporter
namespace: kube-system
annotations:
prometheus.io/scrape: "true"
labels:
app: node-exporter
spec:
containers:
- name: node-exporter
image: ${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter
ports:
- containerPort: 9100
hostPort: 9100
EOF
}

89
magnum/drivers/common/templates/kubernetes/fragments/enable-prometheus-monitoring.sh

@ -362,6 +362,81 @@ EOF
)
writeFile $grafanaService_file "$grafanaService_content"
nodeExporter_file=/srv/magnum/kubernetes/monitoring/nodeExporter.yaml
nodeExporter_content=$(cat <<EOF
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: prometheus-monitoring
labels:
k8s-app: node-exporter
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v0.15.2
spec:
selector:
matchLabels:
k8s-app: node-exporter
version: v0.15.2
updateStrategy:
type: OnDelete
template:
metadata:
labels:
k8s-app: node-exporter
version: v0.15.2
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
tolerations:
# Make sure calico/node gets scheduled on all nodes.
- effect: NoSchedule
operator: Exists
# Mark the pod as a critical add-on for rescheduling.
- key: CriticalAddonsOnly
operator: Exists
- effect: NoExecute
operator: Exists
priorityClassName: system-node-critical
containers:
- name: prometheus-node-exporter
image: "${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter:v0.15.2"
imagePullPolicy: "IfNotPresent"
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
ports:
- name: metrics
containerPort: 9100
hostPort: 9100
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
resources:
limits:
cpu: 10m
memory: 50Mi
requests:
cpu: 10m
memory: 50Mi
hostNetwork: true
hostPID: true
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
EOF
)
writeFile $nodeExporter_file "$nodeExporter_content"
. /etc/sysconfig/heat-params
@ -402,6 +477,13 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml
fi
# Check if node exporter daemonset exist
kubectl get daemonset node-exporter -n prometheus-monitoring
if [ "$?" != "0" ] && \
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml" ]; then
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml
fi
# Check if configmap graf-dash exists
kubectl get configmap graf-dash -n prometheus-monitoring
if [ "$?" != "0" ] && \
@ -429,14 +511,15 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
# Which node is running Grafana
NODE_IP=`kubectl get po -n prometheus-monitoring -o jsonpath={.items[0].status.hostIP} -l name=grafana`
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
GRAFANA_SERVICE_IP=`kubectl get svc grafana --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
# The Grafana pod might be running but the app might still be initiating
echo "Check if Grafana is ready..."
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
until [ $? -eq 0 ]
do
sleep 2
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
done
# Inject Prometheus datasource into Grafana
@ -446,7 +529,7 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
-H "Content-Type: application/json;charset=UTF-8" \
--data-binary '''"'"'''{"name":"k8sPrometheus","isDefault":true,
"type":"prometheus","url":"http://'''"'"'''$PROM_SERVICE_IP'''"'"''':9090","access":"proxy"}'''"'"'''\
"http://$NODE_IP:3000/api/datasources/"`
"http://$GRAFANA_SERVICE_IP:3000/api/datasources/"`
if [[ "$INJECT" = *"Datasource added"* ]]; then
echo "Prometheus datasource injected into Grafana"

3
magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml

@ -584,6 +584,9 @@ resources:
- protocol: tcp
port_range_min: 6443
port_range_max: 6443
- protocol: tcp
port_range_min: 9100
port_range_max: 9100
- protocol: tcp
port_range_min: 10250
port_range_max: 10250

7
magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml

@ -417,12 +417,6 @@ resources:
group: ungrouped
config: {get_file: ../../common/templates/fragments/enable-docker-registry.sh}
enable_node_exporter:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
minion_wc_notify:
type: OS::Heat::SoftwareConfig
properties:
@ -468,7 +462,6 @@ resources:
- config: {get_resource: flannel_service}
- config: {get_resource: add_proxy}
- config: {get_resource: enable_services}
- config: {get_resource: enable_node_exporter}
- config: {get_resource: enable_docker_registry}
- config: {get_resource: minion_wc_notify}

3
magnum/drivers/k8s_fedora_ironic_v1/templates/kubecluster.yaml

@ -419,6 +419,9 @@ resources:
- protocol: tcp
port_range_min: 6443
port_range_max: 6443
- protocol: tcp
port_range_min: 9100
port_range_max: 9100
- protocol: tcp
port_range_min: 30000
port_range_max: 32767

7
magnum/drivers/k8s_fedora_ironic_v1/templates/kubeminion_software_configs.yaml

@ -301,12 +301,6 @@ resources:
group: ungrouped
config: {get_file: ../../common/templates/kubernetes/fragments/enable-kube-proxy-minion.sh}
enable_node_exporter:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
minion_wc_notify:
type: OS::Heat::SoftwareConfig
properties:
@ -352,7 +346,6 @@ resources:
- config: {get_resource: add_proxy}
- config: {get_resource: enable_services}
- config: {get_resource: enable_kube_proxy}
- config: {get_resource: enable_node_exporter}
- config: {get_resource: enable_docker_registry}
- config: {get_resource: minion_wc_notify}

Loading…
Cancel
Save