Fix prometheus monitoring
There are 2 changes included in this patch: 1. Using cluster ip instead of fixed ip for grafana service to make sure the address is reachable. 2. Move node exporter to prometheus-monitoring namespace and make it as a DaemonSet to collect metrics from master node. Task: 28468 Story: 2004590 Change-Id: I9090c6dc4b38e1a1466c4c3a6a827d95c089fb41
This commit is contained in:
parent
0bf9ccadcb
commit
b6936894c4
|
@ -1,32 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
. /etc/sysconfig/heat-params
|
|
||||||
|
|
||||||
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Write node-exporter manifest as a regular pod
|
|
||||||
node_exporter_file=/etc/kubernetes/manifests/node-exporter.yaml
|
|
||||||
[ -f ${node_exporter_file} ] || {
|
|
||||||
echo "Writing File: $node_exporter_file"
|
|
||||||
mkdir -p $(dirname ${node_exporter_file})
|
|
||||||
cat << EOF > ${node_exporter_file}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Pod
|
|
||||||
metadata:
|
|
||||||
name: node-exporter
|
|
||||||
namespace: kube-system
|
|
||||||
annotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
labels:
|
|
||||||
app: node-exporter
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: node-exporter
|
|
||||||
image: ${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter
|
|
||||||
ports:
|
|
||||||
- containerPort: 9100
|
|
||||||
hostPort: 9100
|
|
||||||
EOF
|
|
||||||
}
|
|
|
@ -362,6 +362,81 @@ EOF
|
||||||
)
|
)
|
||||||
writeFile $grafanaService_file "$grafanaService_content"
|
writeFile $grafanaService_file "$grafanaService_content"
|
||||||
|
|
||||||
|
nodeExporter_file=/srv/magnum/kubernetes/monitoring/nodeExporter.yaml
|
||||||
|
nodeExporter_content=$(cat <<EOF
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: node-exporter
|
||||||
|
namespace: prometheus-monitoring
|
||||||
|
labels:
|
||||||
|
k8s-app: node-exporter
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
addonmanager.kubernetes.io/mode: Reconcile
|
||||||
|
version: v0.15.2
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: node-exporter
|
||||||
|
version: v0.15.2
|
||||||
|
updateStrategy:
|
||||||
|
type: OnDelete
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
k8s-app: node-exporter
|
||||||
|
version: v0.15.2
|
||||||
|
annotations:
|
||||||
|
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||||
|
spec:
|
||||||
|
tolerations:
|
||||||
|
# Make sure calico/node gets scheduled on all nodes.
|
||||||
|
- effect: NoSchedule
|
||||||
|
operator: Exists
|
||||||
|
# Mark the pod as a critical add-on for rescheduling.
|
||||||
|
- key: CriticalAddonsOnly
|
||||||
|
operator: Exists
|
||||||
|
- effect: NoExecute
|
||||||
|
operator: Exists
|
||||||
|
priorityClassName: system-node-critical
|
||||||
|
containers:
|
||||||
|
- name: prometheus-node-exporter
|
||||||
|
image: "${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter:v0.15.2"
|
||||||
|
imagePullPolicy: "IfNotPresent"
|
||||||
|
args:
|
||||||
|
- --path.procfs=/host/proc
|
||||||
|
- --path.sysfs=/host/sys
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 9100
|
||||||
|
hostPort: 9100
|
||||||
|
volumeMounts:
|
||||||
|
- name: proc
|
||||||
|
mountPath: /host/proc
|
||||||
|
readOnly: true
|
||||||
|
- name: sys
|
||||||
|
mountPath: /host/sys
|
||||||
|
readOnly: true
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 50Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 50Mi
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
volumes:
|
||||||
|
- name: proc
|
||||||
|
hostPath:
|
||||||
|
path: /proc
|
||||||
|
- name: sys
|
||||||
|
hostPath:
|
||||||
|
path: /sys
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
writeFile $nodeExporter_file "$nodeExporter_content"
|
||||||
|
|
||||||
. /etc/sysconfig/heat-params
|
. /etc/sysconfig/heat-params
|
||||||
|
|
||||||
|
|
||||||
|
@ -402,6 +477,13 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
||||||
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml
|
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check if node exporter daemonset exist
|
||||||
|
kubectl get daemonset node-exporter -n prometheus-monitoring
|
||||||
|
if [ "$?" != "0" ] && \
|
||||||
|
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml" ]; then
|
||||||
|
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if configmap graf-dash exists
|
# Check if configmap graf-dash exists
|
||||||
kubectl get configmap graf-dash -n prometheus-monitoring
|
kubectl get configmap graf-dash -n prometheus-monitoring
|
||||||
if [ "$?" != "0" ] && \
|
if [ "$?" != "0" ] && \
|
||||||
|
@ -429,14 +511,15 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
||||||
# Which node is running Grafana
|
# Which node is running Grafana
|
||||||
NODE_IP=`kubectl get po -n prometheus-monitoring -o jsonpath={.items[0].status.hostIP} -l name=grafana`
|
NODE_IP=`kubectl get po -n prometheus-monitoring -o jsonpath={.items[0].status.hostIP} -l name=grafana`
|
||||||
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
|
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
|
||||||
|
GRAFANA_SERVICE_IP=`kubectl get svc grafana --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
|
||||||
|
|
||||||
# The Grafana pod might be running but the app might still be initiating
|
# The Grafana pod might be running but the app might still be initiating
|
||||||
echo "Check if Grafana is ready..."
|
echo "Check if Grafana is ready..."
|
||||||
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
|
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
|
||||||
until [ $? -eq 0 ]
|
until [ $? -eq 0 ]
|
||||||
do
|
do
|
||||||
sleep 2
|
sleep 2
|
||||||
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
|
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
|
||||||
done
|
done
|
||||||
|
|
||||||
# Inject Prometheus datasource into Grafana
|
# Inject Prometheus datasource into Grafana
|
||||||
|
@ -446,7 +529,7 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
||||||
-H "Content-Type: application/json;charset=UTF-8" \
|
-H "Content-Type: application/json;charset=UTF-8" \
|
||||||
--data-binary '''"'"'''{"name":"k8sPrometheus","isDefault":true,
|
--data-binary '''"'"'''{"name":"k8sPrometheus","isDefault":true,
|
||||||
"type":"prometheus","url":"http://'''"'"'''$PROM_SERVICE_IP'''"'"''':9090","access":"proxy"}'''"'"'''\
|
"type":"prometheus","url":"http://'''"'"'''$PROM_SERVICE_IP'''"'"''':9090","access":"proxy"}'''"'"'''\
|
||||||
"http://$NODE_IP:3000/api/datasources/"`
|
"http://$GRAFANA_SERVICE_IP:3000/api/datasources/"`
|
||||||
|
|
||||||
if [[ "$INJECT" = *"Datasource added"* ]]; then
|
if [[ "$INJECT" = *"Datasource added"* ]]; then
|
||||||
echo "Prometheus datasource injected into Grafana"
|
echo "Prometheus datasource injected into Grafana"
|
||||||
|
|
|
@ -584,6 +584,9 @@ resources:
|
||||||
- protocol: tcp
|
- protocol: tcp
|
||||||
port_range_min: 6443
|
port_range_min: 6443
|
||||||
port_range_max: 6443
|
port_range_max: 6443
|
||||||
|
- protocol: tcp
|
||||||
|
port_range_min: 9100
|
||||||
|
port_range_max: 9100
|
||||||
- protocol: tcp
|
- protocol: tcp
|
||||||
port_range_min: 10250
|
port_range_min: 10250
|
||||||
port_range_max: 10250
|
port_range_max: 10250
|
||||||
|
|
|
@ -417,12 +417,6 @@ resources:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config: {get_file: ../../common/templates/fragments/enable-docker-registry.sh}
|
config: {get_file: ../../common/templates/fragments/enable-docker-registry.sh}
|
||||||
|
|
||||||
enable_node_exporter:
|
|
||||||
type: OS::Heat::SoftwareConfig
|
|
||||||
properties:
|
|
||||||
group: ungrouped
|
|
||||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
|
|
||||||
|
|
||||||
minion_wc_notify:
|
minion_wc_notify:
|
||||||
type: OS::Heat::SoftwareConfig
|
type: OS::Heat::SoftwareConfig
|
||||||
properties:
|
properties:
|
||||||
|
@ -468,7 +462,6 @@ resources:
|
||||||
- config: {get_resource: flannel_service}
|
- config: {get_resource: flannel_service}
|
||||||
- config: {get_resource: add_proxy}
|
- config: {get_resource: add_proxy}
|
||||||
- config: {get_resource: enable_services}
|
- config: {get_resource: enable_services}
|
||||||
- config: {get_resource: enable_node_exporter}
|
|
||||||
- config: {get_resource: enable_docker_registry}
|
- config: {get_resource: enable_docker_registry}
|
||||||
- config: {get_resource: minion_wc_notify}
|
- config: {get_resource: minion_wc_notify}
|
||||||
|
|
||||||
|
|
|
@ -419,6 +419,9 @@ resources:
|
||||||
- protocol: tcp
|
- protocol: tcp
|
||||||
port_range_min: 6443
|
port_range_min: 6443
|
||||||
port_range_max: 6443
|
port_range_max: 6443
|
||||||
|
- protocol: tcp
|
||||||
|
port_range_min: 9100
|
||||||
|
port_range_max: 9100
|
||||||
- protocol: tcp
|
- protocol: tcp
|
||||||
port_range_min: 30000
|
port_range_min: 30000
|
||||||
port_range_max: 32767
|
port_range_max: 32767
|
||||||
|
|
|
@ -301,12 +301,6 @@ resources:
|
||||||
group: ungrouped
|
group: ungrouped
|
||||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-kube-proxy-minion.sh}
|
config: {get_file: ../../common/templates/kubernetes/fragments/enable-kube-proxy-minion.sh}
|
||||||
|
|
||||||
enable_node_exporter:
|
|
||||||
type: OS::Heat::SoftwareConfig
|
|
||||||
properties:
|
|
||||||
group: ungrouped
|
|
||||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
|
|
||||||
|
|
||||||
minion_wc_notify:
|
minion_wc_notify:
|
||||||
type: OS::Heat::SoftwareConfig
|
type: OS::Heat::SoftwareConfig
|
||||||
properties:
|
properties:
|
||||||
|
@ -352,7 +346,6 @@ resources:
|
||||||
- config: {get_resource: add_proxy}
|
- config: {get_resource: add_proxy}
|
||||||
- config: {get_resource: enable_services}
|
- config: {get_resource: enable_services}
|
||||||
- config: {get_resource: enable_kube_proxy}
|
- config: {get_resource: enable_kube_proxy}
|
||||||
- config: {get_resource: enable_node_exporter}
|
|
||||||
- config: {get_resource: enable_docker_registry}
|
- config: {get_resource: enable_docker_registry}
|
||||||
- config: {get_resource: minion_wc_notify}
|
- config: {get_resource: minion_wc_notify}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue