Fix prometheus monitoring
There are 2 changes included in this patch: 1. Using cluster ip instead of fixed ip for grafana service to make sure the address is reachable. 2. Move node exporter to prometheus-monitoring namespace and make it as a DaemonSet to collect metrics from master node. Task: 28468 Story: 2004590 Change-Id: I9090c6dc4b38e1a1466c4c3a6a827d95c089fb41
This commit is contained in:
parent
0bf9ccadcb
commit
b6936894c4
|
@ -1,32 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
. /etc/sysconfig/heat-params
|
||||
|
||||
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Write node-exporter manifest as a regular pod
|
||||
node_exporter_file=/etc/kubernetes/manifests/node-exporter.yaml
|
||||
[ -f ${node_exporter_file} ] || {
|
||||
echo "Writing File: $node_exporter_file"
|
||||
mkdir -p $(dirname ${node_exporter_file})
|
||||
cat << EOF > ${node_exporter_file}
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app: node-exporter
|
||||
spec:
|
||||
containers:
|
||||
- name: node-exporter
|
||||
image: ${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
EOF
|
||||
}
|
|
@ -362,6 +362,81 @@ EOF
|
|||
)
|
||||
writeFile $grafanaService_file "$grafanaService_content"
|
||||
|
||||
nodeExporter_file=/srv/magnum/kubernetes/monitoring/nodeExporter.yaml
|
||||
nodeExporter_content=$(cat <<EOF
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: prometheus-monitoring
|
||||
labels:
|
||||
k8s-app: node-exporter
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v0.15.2
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: node-exporter
|
||||
version: v0.15.2
|
||||
updateStrategy:
|
||||
type: OnDelete
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: node-exporter
|
||||
version: v0.15.2
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||
spec:
|
||||
tolerations:
|
||||
# Make sure calico/node gets scheduled on all nodes.
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
# Mark the pod as a critical add-on for rescheduling.
|
||||
- key: CriticalAddonsOnly
|
||||
operator: Exists
|
||||
- effect: NoExecute
|
||||
operator: Exists
|
||||
priorityClassName: system-node-critical
|
||||
containers:
|
||||
- name: prometheus-node-exporter
|
||||
image: "${CONTAINER_INFRA_PREFIX:-docker.io/prom/}node-exporter:v0.15.2"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9100
|
||||
hostPort: 9100
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /host/proc
|
||||
readOnly: true
|
||||
- name: sys
|
||||
mountPath: /host/sys
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
- name: sys
|
||||
hostPath:
|
||||
path: /sys
|
||||
EOF
|
||||
)
|
||||
writeFile $nodeExporter_file "$nodeExporter_content"
|
||||
|
||||
. /etc/sysconfig/heat-params
|
||||
|
||||
|
||||
|
@ -402,6 +477,13 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
|||
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml
|
||||
fi
|
||||
|
||||
# Check if node exporter daemonset exist
|
||||
kubectl get daemonset node-exporter -n prometheus-monitoring
|
||||
if [ "$?" != "0" ] && \
|
||||
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml" ]; then
|
||||
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/nodeExporter.yaml
|
||||
fi
|
||||
|
||||
# Check if configmap graf-dash exists
|
||||
kubectl get configmap graf-dash -n prometheus-monitoring
|
||||
if [ "$?" != "0" ] && \
|
||||
|
@ -429,14 +511,15 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
|||
# Which node is running Grafana
|
||||
NODE_IP=`kubectl get po -n prometheus-monitoring -o jsonpath={.items[0].status.hostIP} -l name=grafana`
|
||||
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
|
||||
GRAFANA_SERVICE_IP=`kubectl get svc grafana --namespace prometheus-monitoring -o jsonpath={..clusterIP}`
|
||||
|
||||
# The Grafana pod might be running but the app might still be initiating
|
||||
echo "Check if Grafana is ready..."
|
||||
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
|
||||
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
|
||||
until [ $? -eq 0 ]
|
||||
do
|
||||
sleep 2
|
||||
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
|
||||
curl --user admin:$ADMIN_PASSWD -X GET http://$GRAFANA_SERVICE_IP:3000/api/datasources/1
|
||||
done
|
||||
|
||||
# Inject Prometheus datasource into Grafana
|
||||
|
@ -446,7 +529,7 @@ if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "true" ]; t
|
|||
-H "Content-Type: application/json;charset=UTF-8" \
|
||||
--data-binary '''"'"'''{"name":"k8sPrometheus","isDefault":true,
|
||||
"type":"prometheus","url":"http://'''"'"'''$PROM_SERVICE_IP'''"'"''':9090","access":"proxy"}'''"'"'''\
|
||||
"http://$NODE_IP:3000/api/datasources/"`
|
||||
"http://$GRAFANA_SERVICE_IP:3000/api/datasources/"`
|
||||
|
||||
if [[ "$INJECT" = *"Datasource added"* ]]; then
|
||||
echo "Prometheus datasource injected into Grafana"
|
||||
|
|
|
@ -584,6 +584,9 @@ resources:
|
|||
- protocol: tcp
|
||||
port_range_min: 6443
|
||||
port_range_max: 6443
|
||||
- protocol: tcp
|
||||
port_range_min: 9100
|
||||
port_range_max: 9100
|
||||
- protocol: tcp
|
||||
port_range_min: 10250
|
||||
port_range_max: 10250
|
||||
|
|
|
@ -417,12 +417,6 @@ resources:
|
|||
group: ungrouped
|
||||
config: {get_file: ../../common/templates/fragments/enable-docker-registry.sh}
|
||||
|
||||
enable_node_exporter:
|
||||
type: OS::Heat::SoftwareConfig
|
||||
properties:
|
||||
group: ungrouped
|
||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
|
||||
|
||||
minion_wc_notify:
|
||||
type: OS::Heat::SoftwareConfig
|
||||
properties:
|
||||
|
@ -468,7 +462,6 @@ resources:
|
|||
- config: {get_resource: flannel_service}
|
||||
- config: {get_resource: add_proxy}
|
||||
- config: {get_resource: enable_services}
|
||||
- config: {get_resource: enable_node_exporter}
|
||||
- config: {get_resource: enable_docker_registry}
|
||||
- config: {get_resource: minion_wc_notify}
|
||||
|
||||
|
|
|
@ -419,6 +419,9 @@ resources:
|
|||
- protocol: tcp
|
||||
port_range_min: 6443
|
||||
port_range_max: 6443
|
||||
- protocol: tcp
|
||||
port_range_min: 9100
|
||||
port_range_max: 9100
|
||||
- protocol: tcp
|
||||
port_range_min: 30000
|
||||
port_range_max: 32767
|
||||
|
|
|
@ -301,12 +301,6 @@ resources:
|
|||
group: ungrouped
|
||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-kube-proxy-minion.sh}
|
||||
|
||||
enable_node_exporter:
|
||||
type: OS::Heat::SoftwareConfig
|
||||
properties:
|
||||
group: ungrouped
|
||||
config: {get_file: ../../common/templates/kubernetes/fragments/enable-node-exporter.sh}
|
||||
|
||||
minion_wc_notify:
|
||||
type: OS::Heat::SoftwareConfig
|
||||
properties:
|
||||
|
@ -352,7 +346,6 @@ resources:
|
|||
- config: {get_resource: add_proxy}
|
||||
- config: {get_resource: enable_services}
|
||||
- config: {get_resource: enable_kube_proxy}
|
||||
- config: {get_resource: enable_node_exporter}
|
||||
- config: {get_resource: enable_docker_registry}
|
||||
- config: {get_resource: minion_wc_notify}
|
||||
|
||||
|
|
Loading…
Reference in New Issue