Prometheus monitoring for OSH infra

This will move prometheus to OSH-infra to be included as part of
the basic infrastructure deploy for openstack-helm. It includes
charts for Prometheus, Node Exporter, Kube-State-Metrics, and
Alertmanager. It provides a base for monitoring and alerting
for the underlying infrastructure

Partially Implements: blueprint osh-monitoring

Change-Id: Ie453373b54c5f1825339ce0566e4b5d0f74abc20
This commit is contained in:
Steve Wilkerson 2017-11-03 10:59:08 -05:00
parent 13c4199742
commit 429a4edd86
59 changed files with 3640 additions and 0 deletions

24
alertmanager/Chart.yaml Normal file
View File

@ -0,0 +1,24 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
description: OpenStack-Helm Alertmanager
name: alertmanager
version: 0.1.0
home: https://prometheus.io/docs/alerting/alertmanager/
sources:
- https://github.com/prometheus/alertmanager
- https://git.openstack.org/cgit/openstack/openstack-helm-infra
maintainers:
- name: OpenStack-Helm Authors

View File

@ -0,0 +1,18 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dependencies:
- name: helm-toolkit
repository: http://localhost:8879/charts
version: 0.1.0

View File

@ -0,0 +1,32 @@
#!/bin/sh
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
COMMAND="${@:-start}"
function start () {
exec /bin/alertmanager \
-config.file=/etc/config/alertmanager.yml \
-storage.path=/var/lib/alertmanager/data
}
function stop () {
kill -TERM 1
}
$COMMAND

View File

@ -0,0 +1,31 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrolebinding }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: run-alertmanager
subjects:
- kind: ServiceAccount
name: alertmanager
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@ -0,0 +1,29 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_bin }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-bin
data:
alertmanager.sh: |
{{ tuple "bin/_alertmanager.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
image-repo-sync.sh: |+
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -0,0 +1,27 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_etc }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-etc
data:
alertmanager.yml:
{{- toYaml .Values.conf.alertmanager | indent 4 }}
{{- end }}

View File

@ -0,0 +1,60 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.ingress }}
{{- $envAll := . }}
{{- if .Values.network.alertmanager.ingress.public }}
{{- $backendServiceType := "alerts" }}
{{- $backendPort := "alerts-api" }}
{{- $ingressName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $backendName := tuple $backendServiceType "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $hostName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $hostNameNamespaced := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_namespaced_endpoint_lookup" }}
{{- $hostNameFull := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: {{ $ingressName }}
annotations:
kubernetes.io/ingress.class: "nginx"
ingress.kubernetes.io/rewrite-target: /
ingress.kubernetes.io/proxy-body-size: {{ .Values.network.alertmanager.ingress.proxy_body_size }}
spec:
rules:
{{ if ne $hostNameNamespaced $hostNameFull }}
{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced $hostNameFull }}
- host: {{ $vHost }}
http:
paths:
- path: /
backend:
serviceName: {{ $backendName }}
servicePort: {{ $backendPort }}
{{- end }}
{{- else }}
{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced }}
- host: {{ $vHost }}
http:
paths:
- path: /
backend:
serviceName: {{ $backendName }}
servicePort: {{ $backendPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,65 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_image_repo_sync }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}}
---
apiVersion: batch/v1
kind: Job
metadata:
name: alertmanager-image-repo-sync
spec:
template:
metadata:
labels:
{{ tuple $envAll "alertmanager" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: image-repo-sync
{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: LOCAL_REPO
value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: IMAGE_SYNC_LIST
value: "{{ include "helm-toolkit.utils.image_sync_list" . }}"
command:
- /tmp/image-repo-sync.sh
volumeMounts:
- name: alertmanager-bin
mountPath: /tmp/image-repo-sync.sh
subPath: image-repo-sync.sh
readOnly: true
- name: docker-socket
mountPath: /var/run/docker.sock
volumes:
- name: alertmanager-bin
configMap:
name: alertmanager-bin
defaultMode: 0555
- name: docker-socket
hostPath:
path: /var/run/docker.sock
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,31 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.pvc }}
{{- $envAll := . }}
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ .Values.storage.pvc.name }}
spec:
accessModes:
- {{ .Values.storage.pvc.access_mode }}
resources:
requests:
storage: {{ .Values.storage.requests.storage }}
storageClassName: {{ .Values.storage.storage_class }}
{{- end }}

View File

@ -0,0 +1,20 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.rbac_entrypoint }}
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}}
{{- end }}

View File

@ -0,0 +1,32 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service_ingress }}
{{- $envAll := . }}
{{- if .Values.network.alertmanager.ingress.public }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "alerts" "public" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
spec:
ports:
- name: http
port: 80
selector:
app: ingress-api
{{- end }}
{{- end }}

View File

@ -0,0 +1,36 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service }}
{{- $envAll := . }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "alerts" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
spec:
ports:
- name: alerts-api
{{ if .Values.network.alertmanager.node_port.enabled }}
nodePort: {{ .Values.network.alertmanager.node_port.port }}
{{ end }}
port: {{ .Values.network.alertmanager.port }}
selector:
{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
{{ if .Values.network.alertmanager.node_port.enabled }}
type: NodePort
{{ end }}
{{- end }}

View File

@ -0,0 +1,22 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.serviceaccount }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: alertmanager
{{- end }}

View File

@ -0,0 +1,106 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.statefulset }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.alertmanager .Values.conditional_dependencies.local_image_registry) -}}
{{- else -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.alertmanager -}}
{{- end -}}
{{- $mounts_alertmanager := .Values.pod.mounts.alertmanager.alertmanager }}
{{- $mounts_alertmanager_init := .Values.pod.mounts.alertmanager.init_container }}
---
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: alertmanager
spec:
serviceName: {{ tuple "alerts" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
replicas: {{ .Values.pod.replicas.alertmanager }}
template:
metadata:
labels:
{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
annotations:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
serviceAccount: alertmanager
affinity:
{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.alertmanager.timeout | default "30" }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: alertmanager
{{ tuple $envAll "alertmanager" | include "helm-toolkit.snippets.image" | indent 10 }}
command:
- /tmp/alertmanager.sh
- start
lifecycle:
preStop:
exec:
command:
- /tmp/alertmanager.sh
- stop
{{ tuple $envAll $envAll.Values.pod.resources.alertmanager | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
ports:
- name: alerts-api
containerPort: {{ .Values.network.alertmanager.port }}
readinessProbe:
httpGet:
path: /#/status
port: {{ .Values.network.alertmanager.port }}
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- name: etc-alertmanager
mountPath: /etc/config
- name: alertmanager-etc
mountPath: /etc/config/alertmanager.yml
subPath: alertmanager.yml
readOnly: true
- name: alertmanager-bin
mountPath: /tmp/alertmanager.sh
subPath: alertmanager.sh
readOnly: true
- name: storage
mountPath: /var/lib/alertmanager/data
{{ if $mounts_alertmanager.volumeMounts }}{{ toYaml $mounts_alertmanager.volumeMounts | indent 12 }}{{ end }}
volumes:
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
- name: etc-alertmanager
emptyDir: {}
- name: alertmanager-etc
configMap:
name: alertmanager-etc
- name: alertmanager-bin
configMap:
name: alertmanager-bin
defaultMode: 0555
{{- if .Values.storage.enabled }}
- name: storage
persistentVolumeClaim:
claimName: {{ .Values.storage.pvc.name }}
{{- else }}
- name: storage
emptyDir: {}
{{- end }}
{{ if $mounts_alertmanager.volumes }}{{ toYaml $mounts_alertmanager.volumes | indent 8 }}{{ end }}
{{- end }}

254
alertmanager/values.yaml Normal file
View File

@ -0,0 +1,254 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for alertmanager.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
images:
tags:
alertmanager: docker.io/prom/alertmanager:v0.11.0
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
alertmanager:
alertmanager:
init_container: null
replicas:
alertmanager: 1
lifecycle:
upgrades:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
alertmanager:
timeout: 30
resources:
enabled: false
alertmanager:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
alerts:
name: alertmanager
namespace: null
hosts:
default: alerts-api
public: alertmanager
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9093
public: 80
dependencies:
alertmanager:
services: null
image_repo_sync:
services:
- service: local_image_registry
endpoint: internal
conditional_dependencies:
local_image_registry:
jobs:
- alertmanager-image-repo-sync
services:
- service: local_image_registry
endpoint: node
network:
alertmanager:
ingress:
public: true
proxy_body_size: 1024M
node_port:
enabled: false
port: 30903
port: 9093
storage:
enabled: true
pvc:
name: alertmanager-pvc
access_mode: ReadWriteMany
requests:
storage: 5Gi
storage_class: general
manifests:
clusterrolebinding: true
configmap_bin: true
configmap_etc: true
ingress: true
job_image_repo_sync: true
pvc: true
rbac_entrypoint: true
service: true
service_ingress: true
serviceaccount: true
statefulset: true
conf:
alertmanager: |
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
smtp_auth_username: 'alertmanager'
smtp_auth_password: 'password'
# The auth token for Hipchat.
hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
hipchat_api_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: team-X-mails
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
# This routes performs a regular expression match on alert labels to
# catch alerts that are related to a list of services.
- match_re:
service: ^(foo1|foo2|baz)$
receiver: team-X-mails
# The service has a sub-route for critical alerts, any alerts
# that do not match, i.e. severity != critical, fall-back to the
# parent node and are sent to 'team-X-mails'
routes:
- match:
severity: critical
receiver: team-X-pager
- match:
service: files
receiver: team-Y-mails
routes:
- match:
severity: critical
receiver: team-Y-pager
# This route handles all alerts coming from a database service. If there's
# no team to handle it, it defaults to the DB team.
- match:
service: database
receiver: team-DB-pager
# Also group alerts by affected database.
group_by: [alertname, cluster, database]
routes:
- match:
owner: team-X
receiver: team-X-pager
- match:
owner: team-Y
receiver: team-Y-pager
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
- name: 'team-X-mails'
email_configs:
- to: 'team-X+alerts@example.org'
- name: 'team-X-pager'
email_configs:
- to: 'team-X+alerts-critical@example.org'
pagerduty_configs:
- service_key: <team-X-key>
- name: 'team-Y-mails'
email_configs:
- to: 'team-Y+alerts@example.org'
- name: 'team-Y-pager'
pagerduty_configs:
- service_key: <team-Y-key>
- name: 'team-DB-pager'
pagerduty_configs:
- service_key: <team-DB-key>
- name: 'team-X-hipchat'
hipchat_configs:
- auth_token: <auth_token>
room_id: 85
message_format: html
notify: true

View File

@ -0,0 +1,48 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
# Appends annotations for configuring prometheus scrape endpoints via
# annotations. The required annotations are:
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
{{- define "helm-toolkit.snippets.prometheus_service_annotations" -}}
{{- $endpoint := index . 0 -}}
{{- $context := index . 1 -}}
prometheus.io/scrape: {{ $endpoint.scrape | quote }}
prometheus.io/scheme: {{ $endpoint.scheme.default | quote }}
prometheus.io/path: {{ $endpoint.path.default | quote }}
prometheus.io/port: {{ $endpoint.scrape_port | quote }}
{{- end -}}
# Appends annotations for configuring prometheus scrape jobs via pod
# annotations. The required annotations are:
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
{{- define "helm-toolkit.snippets.prometheus_pod_annotations" -}}
{{- $pod := index . 0 -}}
{{- $context := index . 1 -}}
prometheus.io/scrape: {{ $pod.scrape | quote }}
prometheus.io/path: {{ $pod.path.default | quote }}
prometheus.io/port: {{ $pod.scrape_port | quote }}
{{- end -}}

View File

@ -0,0 +1,24 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
description: OpenStack-Helm Kube-State-Metrics
name: kube-state-metrics
version: 0.1.0
home: https://github.com/kubernetes/kube-state-metrics
sources:
- https://github.com/kubernetes/kube-state-metrics
- https://git.openstack.org/cgit/openstack/openstack-helm-infra
maintainers:
- name: OpenStack-Helm Authors

View File

@ -0,0 +1,19 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dependencies:
- name: helm-toolkit
repository: http://localhost:8879/charts
version: 0.1.0

View File

@ -0,0 +1,64 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrole }}
{{- $envAll := . }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: kube-state-metrics-runner
rules:
- apiGroups:
- ""
resources:
- namespaces
- nodes
- persistentvolumeclaims
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
verbs:
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
{{- end }}

View File

@ -0,0 +1,32 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrolebinding }}
{{- $envAll := . }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: run-kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: kube-state-metrics-runner
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@ -0,0 +1,27 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_bin }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: kube-metrics-bin
data:
image-repo-sync.sh: |+
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -0,0 +1,52 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.deployment }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.kube_state_metrics .Values.conditional_dependencies.local_image_registry) -}}
{{- else -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.kube_state_metrics -}}
{{- end -}}
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: kube-state-metrics
spec:
replicas: {{ .Values.pod.replicas.kube_state_metrics }}
{{ tuple $envAll | include "helm-toolkit.snippets.kubernetes_upgrades_deployment" | indent 2 }}
template:
metadata:
labels:
{{ tuple $envAll "kube-state-metrics" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
serviceAccount: kube-state-metrics
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.kube_state_metrics.timeout | default "30" }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: kube-state-metrics
{{ tuple $envAll "kube_state_metrics" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.kube_state_metrics | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
ports:
- name: metrics
containerPort: {{ .Values.network.kube_state_metrics.port }}
volumes:
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
{{- end }}

View File

@ -0,0 +1,65 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_image_repo_sync }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}}
---
apiVersion: batch/v1
kind: Job
metadata:
name: kube-metrics-image-repo-sync
spec:
template:
metadata:
labels:
{{ tuple $envAll "kube-metrics" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: image-repo-sync
{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: LOCAL_REPO
value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: IMAGE_SYNC_LIST
value: "{{ include "helm-toolkit.utils.image_sync_list" . }}"
command:
- /tmp/image-repo-sync.sh
volumeMounts:
- name: kube-metrics-bin
mountPath: /tmp/image-repo-sync.sh
subPath: image-repo-sync.sh
readOnly: true
- name: docker-socket
mountPath: /var/run/docker.sock
volumes:
- name: kube-metrics-bin
configMap:
name: kube-metrics-bin
defaultMode: 0555
- name: docker-socket
hostPath:
path: /var/run/docker.sock
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,20 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.rbac_entrypoint }}
{{- $envAll := . }}
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}}
{{- end }}

View File

@ -0,0 +1,39 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service_controller_manager }}
{{- $envAll := . }}
{{- $endpoint := $envAll.Values.endpoints.kube_controller_manager }}
---
apiVersion: v1
kind: Service
metadata:
name: kube-controller-manager-discovery
labels:
component: kube-controller-manager
annotations:
{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}
spec:
selector:
component: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10252
targetPort: 10252
protocol: TCP
{{- end }}

View File

@ -0,0 +1,34 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service_kube_metrics }}
{{- $envAll := . }}
{{- $endpoint := $envAll.Values.endpoints.kube_metrics }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "kube_metrics" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
annotations:
{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}
spec:
ports:
- name: http
port: {{ .Values.network.kube_state_metrics.port }}
targetPort: 8080
selector:
{{ tuple $envAll "kube-state-metrics" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
{{- end }}

View File

@ -0,0 +1,39 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service_scheduler }}
{{- $envAll := . }}
{{- $endpoint := $envAll.Values.endpoints.kube_scheduler }}
---
apiVersion: v1
kind: Service
metadata:
name: kube-scheduler-discovery
labels:
component: kube-scheduler
annotations:
{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}
spec:
selector:
component: kube-scheduler
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10251
targetPort: 10251
protocol: TCP
{{- end }}

View File

@ -0,0 +1,24 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.serviceaccount }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
{{- end }}

View File

@ -0,0 +1,149 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for kube-state-metrics.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
kube_state_metrics: quay.io/coreos/kube-state-metrics:v1.0.1
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
kube_state_metrics:
kube_state_metrics:
init_container: null
replicas:
kube_state_metrics: 1
prometheus: 1
lifecycle:
upgrades:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
kube_state_metrics:
timeout: 30
resources:
enabled: false
kube_state_metrics:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
dependencies:
image_repo_sync:
services:
- service: local_image_registry
endpoint: internal
conditional_dependencies:
local_image_registry:
jobs:
- kube-metrics-image-repo-sync
services:
- service: local_image_registry
endpoint: node
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
kube_metrics:
namespace: null
hosts:
default: kube-metrics
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
http:
default: 8080
scrape: true
scrape_port: 8080
kube_scheduler:
scheme:
default: 'http'
path:
default: /metrics
scrape: true
scrape_port: 10251
kube_controller_manager:
scheme:
default: 'http'
path:
default: /metrics
scrape: true
scrape_port: 10252
network:
kube_state_metrics:
port: 8080
manifests:
configmap_bin: true
clusterrole: true
clusterrolebinding: true
deployment: true
job_image_repo_sync: true
rbac_entrypoint: true
service_kube_metrics: true
service_controller_manager: true
service_scheduler: true
serviceaccount: true

24
node-exporter/Chart.yaml Normal file
View File

@ -0,0 +1,24 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
description: OpenStack-Helm Node Exporter
name: node-exporter
version: 0.1.0
home: https://github.com/prometheus/node_exporter
sources:
- https://github.com/prometheus/node_exporter
- https://git.openstack.org/cgit/openstack/openstack-helm-infra
maintainers:
- name: OpenStack-Helm Authors

View File

@ -0,0 +1,19 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dependencies:
- name: helm-toolkit
repository: http://localhost:8879/charts
version: 0.1.0

View File

@ -0,0 +1,32 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrolebinding }}
{{- $envAll := . }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: run-node-exporter
subjects:
- kind: ServiceAccount
name: node-exporter
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@ -0,0 +1,27 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_bin }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: node-exporter-bin
data:
image-repo-sync.sh: |+
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -0,0 +1,68 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.daemonset }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.node_exporter .Values.conditional_dependencies.local_image_registry) -}}
{{- else -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.node_exporter -}}
{{- end -}}
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: {{ .Values.endpoints.node_metrics.namespace }}
spec:
{{ tuple $envAll "node_exporter" | include "helm-toolkit.snippets.kubernetes_upgrades_daemonset" | indent 2 }}
template:
metadata:
labels:
{{ tuple $envAll "node_exporter" "metrics" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
namespace: {{ .Values.endpoints.node_metrics.namespace }}
spec:
serviceAccount: node-exporter
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
hostNetwork: true
hostPID: true
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: node-exporter
{{ tuple $envAll "node_exporter" | include "helm-toolkit.snippets.image" | indent 10 }}
ports:
- name: metrics
containerPort: {{ .Values.network.node_exporter.port }}
hostPort: {{ .Values.network.node_exporter.port }}
{{ tuple $envAll $envAll.Values.pod.resources.node_exporter | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
volumes:
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
{{- end }}

View File

@ -0,0 +1,65 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_image_repo_sync }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}}
---
apiVersion: batch/v1
kind: Job
metadata:
name: node-exporter-image-repo-sync
spec:
template:
metadata:
labels:
{{ tuple $envAll "node-exporter" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: image-repo-sync
{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: LOCAL_REPO
value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: IMAGE_SYNC_LIST
value: "{{ include "helm-toolkit.utils.image_sync_list" . }}"
command:
- /tmp/image-repo-sync.sh
volumeMounts:
- name: node-exporter-bin
mountPath: /tmp/image-repo-sync.sh
subPath: image-repo-sync.sh
readOnly: true
- name: docker-socket
mountPath: /var/run/docker.sock
volumes:
- name: node-exporter-bin
configMap:
name: node-exporter-bin
defaultMode: 0555
- name: docker-socket
hostPath:
path: /var/run/docker.sock
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,20 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.rbac_entrypoint }}
{{- $envAll := . }}
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}}
{{- end }}

View File

@ -0,0 +1,37 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service }}
{{- $envAll := . }}
{{- $endpoint := $envAll.Values.endpoints.node_metrics }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "node_metrics" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
namespace: {{ .Values.endpoints.node_metrics.namespace }}
annotations:
{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
port: {{ .Values.network.node_exporter.port }}
targetPort: {{ .Values.network.node_exporter.port }}
selector:
{{ tuple $envAll "node_exporter" "metrics" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
{{- end }}

View File

@ -0,0 +1,24 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.serviceaccount }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-exporter
{{- end }}

136
node-exporter/values.yaml Normal file
View File

@ -0,0 +1,136 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for node-exporter.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
node_exporter: docker.io/prom/node-exporter:v0.15.0
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
node_exporter:
node_exporter:
init_container: null
lifecycle:
upgrades:
daemonsets:
pod_replacement_strategy: RollingUpdate
node_exporter:
enabled: true
min_ready_seconds: 0
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
node_exporter:
timeout: 30
resources:
enabled: false
node_exporter:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
dependencies:
node_exporter:
services: null
image_repo_sync:
services:
- service: local_image_registry
endpoint: internal
conditional_dependencies:
local_image_registry:
jobs:
- node-exporter-image-repo-sync
services:
- service: local_image_registry
endpoint: node
network:
node_exporter:
port: 9100
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
node_metrics:
namespace: null
hosts:
default: node-exporter
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
metrics:
default: 9100
scrape: true
scrape_port: 9100
manifests:
configmap_bin: true
clusterrolebinding: true
daemonset: true
job_image_repo_sync: true
rbac_entrypoint: true
service: true
serviceaccount: true

24
prometheus/Chart.yaml Normal file
View File

@ -0,0 +1,24 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
description: OpenStack-Helm Prometheus
name: prometheus
version: 0.1.0
home: https://prometheus.io/
sources:
- https://github.com/prometheus/prometheus
- https://git.openstack.org/cgit/openstack/openstack-helm-infra
maintainers:
- name: OpenStack-Helm Authors

View File

@ -0,0 +1,18 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dependencies:
- name: helm-toolkit
repository: http://localhost:8879/charts
version: 0.1.0

View File

@ -0,0 +1,59 @@
#!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
function endpoints_up () {
endpoints_result=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/query?query=up" \
| python -c "import sys, json; print json.load(sys.stdin)['status']")
if [ "$endpoints_result" = "success" ];
then
echo "PASS: Endpoints successfully queried!"
else
echo "FAIL: Endpoints not queried!";
exit 1;
fi
}
function get_targets () {
targets_result=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/targets" \
| python -c "import sys, json; print json.load(sys.stdin)['status']")
if [ "$targets_result" = "success" ];
then
echo "PASS: Targets successfully queried!"
else
echo "FAIL: Endpoints not queried!";
exit 1;
fi
}
function get_alertmanagers () {
alertmanager=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/alertmanagers" \
| python -c "import sys, json; print json.load(sys.stdin)['status']")
if [ "$alertmanager" = "success" ];
then
echo "PASS: Alertmanager successfully queried!"
else
echo "FAIL: Alertmanager not queried!";
exit 1;
fi
}
endpoints_up
get_targets
get_alertmanagers

View File

@ -0,0 +1,38 @@
#!/bin/sh
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
COMMAND="${@:-start}"
function start () {
exec /bin/prometheus \
-config.file=/etc/config/prometheus.yml \
-alertmanager.url={{ tuple "alerts" "internal" "api" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }} \
-storage.local.path={{ .Values.conf.prometheus.storage.local.path }} \
-storage.local.retention={{ .Values.conf.prometheus.storage.local.retention }} \
-log.format={{ .Values.conf.prometheus.log.format | quote }} \
-log.level={{ .Values.conf.prometheus.log.level | quote }} \
-query.max-concurrency={{ .Values.conf.prometheus.query.max_concurrency }} \
-query.timeout={{ .Values.conf.prometheus.query.timeout }}
}
function stop () {
kill -TERM 1
}
$COMMAND

View File

@ -0,0 +1,46 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrole }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus-runner
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- nonResourceURLs:
- "/metrics"
verbs:
- get
{{- end }}

View File

@ -0,0 +1,32 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.clusterrolebinding }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: run-prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: prometheus-runner
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@ -0,0 +1,31 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_bin }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-bin
data:
prometheus.sh: |
{{ tuple "bin/_prometheus.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
helm-tests.sh: |
{{ tuple "bin/_helm-tests.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
image-repo-sync.sh: |+
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -0,0 +1,27 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_etc }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-etc
data:
prometheus.yml:
{{- toYaml .Values.conf.prometheus.scrape_configs | indent 4 }}
{{- end }}

View File

@ -0,0 +1,47 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.configmap_rules }}
{{- $envAll := . }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules
data:
alertmanager.rules:
{{ toYaml .Values.conf.prometheus.rules.alertmanager | indent 4 }}
etcd3.rules:
{{ toYaml .Values.conf.prometheus.rules.etcd3 | indent 4 }}
kube-apiserver.rules:
{{ toYaml .Values.conf.prometheus.rules.kube_apiserver | indent 4 }}
kube-controller-manager.rules:
{{ toYaml .Values.conf.prometheus.rules.kube_controller_manager | indent 4 }}
kubelet.rules:
{{ toYaml .Values.conf.prometheus.rules.kubelet | indent 4 }}
kubernetes.rules:
{{ toYaml .Values.conf.prometheus.rules.kubernetes | indent 4 }}
rabbitmq.rules:
{{ toYaml .Values.conf.prometheus.rules.rabbitmq | indent 4 }}
mysql.rules:
{{ toYaml .Values.conf.prometheus.rules.mysql | indent 4 }}
ceph.rules:
{{ toYaml .Values.conf.prometheus.rules.ceph | indent 4 }}
openstack.rules:
{{ toYaml .Values.conf.prometheus.rules.openstack | indent 4 }}
custom.rules:
{{ toYaml .Values.conf.prometheus.rules.custom | indent 4 }}
{{- end }}

View File

@ -0,0 +1,60 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.ingress_prometheus }}
{{- $envAll := . }}
{{- if .Values.network.prometheus.ingress.public }}
{{- $backendServiceType := "monitoring" }}
{{- $backendPort := "prom-metrics" }}
{{- $ingressName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $backendName := tuple $backendServiceType "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $hostName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
{{- $hostNameNamespaced := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_namespaced_endpoint_lookup" }}
{{- $hostNameFull := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: {{ $ingressName }}
annotations:
kubernetes.io/ingress.class: "nginx"
ingress.kubernetes.io/rewrite-target: /
ingress.kubernetes.io/proxy-body-size: {{ .Values.network.prometheus.ingress.proxy_body_size }}
spec:
rules:
{{ if ne $hostNameNamespaced $hostNameFull }}
{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced $hostNameFull }}
- host: {{ $vHost }}
http:
paths:
- path: /
backend:
serviceName: {{ $backendName }}
servicePort: {{ $backendPort }}
{{- end }}
{{- else }}
{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced }}
- host: {{ $vHost }}
http:
paths:
- path: /
backend:
serviceName: {{ $backendName }}
servicePort: {{ $backendPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,65 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_image_repo_sync }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}}
---
apiVersion: batch/v1
kind: Job
metadata:
name: prometheus-image-repo-sync
spec:
template:
metadata:
labels:
{{ tuple $envAll "prometheus" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: image-repo-sync
{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: LOCAL_REPO
value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: IMAGE_SYNC_LIST
value: "{{ include "helm-toolkit.utils.image_sync_list" . }}"
command:
- /tmp/image-repo-sync.sh
volumeMounts:
- name: prometheus-bin
mountPath: /tmp/image-repo-sync.sh
subPath: image-repo-sync.sh
readOnly: true
- name: docker-socket
mountPath: /var/run/docker.sock
volumes:
- name: prometheus-bin
configMap:
name: prometheus-bin
defaultMode: 0555
- name: docker-socket
hostPath:
path: /var/run/docker.sock
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,46 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.helm_tests }}
{{- $envAll := . }}
---
apiVersion: v1
kind: Pod
metadata:
name: "{{.Release.Name}}-test"
annotations:
"helm.sh/hook": test-success
spec:
restartPolicy: Never
containers:
- name: {{.Release.Name}}-helm-tests
{{ tuple $envAll "helm_tests" | include "helm-toolkit.snippets.image" | indent 6 }}
command:
- /tmp/helm-tests.sh
env:
- name: PROMETHEUS_ENDPOINT
value: {{ tuple "monitoring" "internal" "api" $envAll | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }}
volumeMounts:
- name: prometheus-bin
mountPath: /tmp/helm-tests.sh
subPath: helm-tests.sh
readOnly: true
volumes:
- name: prometheus-bin
configMap:
name: prometheus-bin
defaultMode: 0555
{{- end }}

View File

@ -0,0 +1,31 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.pvc }}
{{- $envAll := . }}
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ .Values.storage.pvc.name }}
spec:
accessModes:
- {{ .Values.storage.pvc.access_mode }}
resources:
requests:
storage: {{ .Values.storage.requests.storage }}
storageClassName: {{ .Values.storage.storage_class }}
{{- end }}

View File

@ -0,0 +1,20 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.rbac_entrypoint }}
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}}
{{- end }}

View File

@ -0,0 +1,32 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service_ingress_prometheus }}
{{- if .Values.network.prometheus.ingress.public }}
{{- $envAll := . }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
spec:
ports:
- name: http
port: 80
selector:
app: ingress-api
{{- end }}
{{- end }}

View File

@ -0,0 +1,39 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.service }}
{{- $envAll := . }}
{{- $endpoint := $envAll.Values.endpoints.monitoring }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ tuple "monitoring" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
annotations:
{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}
spec:
ports:
- name: prom-metrics
port: {{ .Values.network.prometheus.port }}
{{ if .Values.network.prometheus.node_port.enabled }}
nodePort: {{ .Values.network.prometheus.node_port.port }}
{{ end }}
selector:
{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
{{ if .Values.network.prometheus.node_port.enabled }}
type: NodePort
{{ end }}
{{- end }}

View File

@ -0,0 +1,22 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.serviceaccount }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
{{- end }}

View File

@ -0,0 +1,158 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.statefulset_prometheus }}
{{- $envAll := . }}
{{- if .Values.images.local_registry.active -}}
{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.prometheus .Values.conditional_dependencies.local_image_registry) -}}
{{- else -}}
{{- $_ := set .Values "pod_dependency" .Values.dependencies.prometheus -}}
{{- end -}}
{{- $mounts_prometheus := .Values.pod.mounts.prometheus.prometheus }}
{{- $mounts_prometheus_init := .Values.pod.mounts.prometheus.init_container }}
---
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: prometheus
spec:
serviceName: {{ tuple "monitoring" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
replicas: {{ .Values.pod.replicas.prometheus }}
template:
metadata:
labels:
{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
annotations:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-rules-hash: {{ tuple "configmap-rules.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
serviceAccount: prometheus
affinity:
{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.prometheus.timeout | default "30" }}
initContainers:
{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: prometheus
{{ tuple $envAll "prometheus" | include "helm-toolkit.snippets.image" | indent 10 }}
command:
- /tmp/prometheus.sh
- start
lifecycle:
preStop:
exec:
command:
- /tmp/prometheus.sh
- stop
{{ tuple $envAll $envAll.Values.pod.resources.prometheus | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
ports:
- name: prom-metrics
containerPort: {{ .Values.network.prometheus.port }}
readinessProbe:
httpGet:
path: /status
port: {{ .Values.network.prometheus.port }}
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- name: etcprometheus
mountPath: /etc/config
- name: rulesprometheus
mountPath: /etc/config/rules
- name: prometheus-rules
mountPath: /etc/config/rules/alertmanager.rules
subPath: alertmanager.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/etcd3.rules
subPath: etcd3.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/kubernetes.rules
subPath: kubernetes.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/kube-apiserver.rules
subPath: kube-apiserver.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/kube-controller-manager.rules
subPath: kube-controller-manager.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/kubelet.rules
subPath: kubelet.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/rabbitmq.rules
subPath: rabbitmq.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/mysql.rules
subPath: mysql.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/ceph.rules
subPath: ceph.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/openstack.rules
subPath: openstack.rules
readOnly: true
- name: prometheus-rules
mountPath: /etc/config/rules/custom.rules
subPath: custom.rules
readOnly: true
- name: prometheus-etc
mountPath: /etc/config/prometheus.yml
subPath: prometheus.yml
readOnly: true
- name: prometheus-bin
mountPath: /tmp/prometheus.sh
subPath: prometheus.sh
readOnly: true
- name: storage
mountPath: /var/lib/prometheus/data
{{ if $mounts_prometheus.volumeMounts }}{{ toYaml $mounts_prometheus.volumeMounts | indent 12 }}{{ end }}
volumes:
{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }}
- name: etcprometheus
emptyDir: {}
- name: rulesprometheus
emptyDir: {}
- name: prometheus-rules
configMap:
name: prometheus-rules
- name: prometheus-etc
configMap:
name: prometheus-etc
- name: prometheus-bin
configMap:
name: prometheus-bin
defaultMode: 0555
{{- if .Values.storage.enabled }}
- name: storage
persistentVolumeClaim:
claimName: {{ .Values.storage.pvc.name }}
{{- else }}
- name: storage
emptyDir: {}
{{- end }}
{{ if $mounts_prometheus.volumes }}{{ toYaml $mounts_prometheus.volumes | indent 8 }}{{ end }}
{{- end }}

907
prometheus/values.yaml Normal file
View File

@ -0,0 +1,907 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for prometheus.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
images:
tags:
prometheus: docker.io/prom/prometheus:v1.7.1
helm_tests: docker.io/kolla/ubuntu-source-kolla-toolbox:3.0.3
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
prometheus:
prometheus:
init_container: null
replicas:
prometheus: 1
lifecycle:
upgrades:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
prometheus:
timeout: 30
resources:
enabled: false
prometheus:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
monitoring:
name: prometheus
namespace: null
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9090
public: 80
scrape: true
scrape_port: 9090
alerts:
name: alertmanager
namespace: null
hosts:
default: alerts-api
public: alertmanager
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9093
public: 80
dependencies:
prometheus:
services: null
image_repo_sync:
services:
- service: local_image_registry
endpoint: internal
conditional_dependencies:
local_image_registry:
jobs:
- prometheus-image-repo-sync
services:
- service: local_image_registry
endpoint: node
network:
prometheus:
ingress:
public: true
proxy_body_size: 1024M
node_port:
enabled: false
port: 30900
port: 9090
storage:
enabled: true
pvc:
name: prometheus-pvc
access_mode: ReadWriteMany
requests:
storage: 5Gi
storage_class: general
manifests:
clusterrole: true
clusterrolebinding: true
configmap_bin: true
configmap_etc: true
configmap_rules: true
ingress_prometheus: true
helm_tests: true
job_image_repo_sync: true
pvc: true
rbac_entrypoint: true
service_ingress_prometheus: true
service: true
serviceaccount: true
statefulset_prometheus: true
conf:
prometheus:
storage:
local:
path: /var/lib/prometheus/data
retention: 168h0m0s
log:
format: logger:stdout?json=true
level: info
query:
max_concurrency: 20
timeout: 2m0s
scrape_configs: |
global:
scrape_interval: 25s
evaluation_interval: 10s
rule_files:
- /etc/config/rules/alertmanager.rules
- /etc/config/rules/etcd3.rules
- /etc/config/rules/kubernetes.rules
- /etc/config/rules/kube-apiserver.rules
- /etc/config/rules/kube-controller-manager.rules
- /etc/config/rules/kubelet.rules
- /etc/config/rules/kube-scheduler.rules
- /etc/config/rules/rabbitmq.rules
- /etc/config/rules/mysql.rules
- /etc/config/rules/ceph.rules
- /etc/config/rules/openstack.rules
- /etc/config/rules/custom.rules
scrape_configs:
- job_name: kubelet
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
scrape_interval: 45s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_name]
action: replace
target_label: kubernetes_io_hostname
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
scrape_interval: 45s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- source_labels: [__meta_kubernetes_node_name]
action: replace
target_label: kubernetes_io_hostname
metric_relabel_configs:
- action: replace
source_labels: [id]
regex: '^/machine\.slice/machine-rkt\\x2d([^\\]+)\\.+/([^/]+)\.service$'
target_label: rkt_container_name
replacement: '${2}-${1}'
- action: replace
source_labels: [id]
regex: '^/system\.slice/(.+)\.service$'
target_label: systemd_service_name
replacement: '${1}'
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'apiserver'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 45s
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- job_name: calico-etcd
honor_labels: false
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "calico-etcd"
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: kube-system
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: calico-etcd
replacement: ${1}
- target_label: endpoint
replacement: "calico-etcd"
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
scheme: http
relabel_configs:
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: alerts-api
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: monitoring
- action: keep
source_labels:
- __meta_kubernetes_endpoint_port_name
regex: alerts-api
rules:
alertmanager: |-
ALERT AlertmanagerConfigInconsistent
IF count_values by (service) ("config_hash", alertmanager_config_hash)
/ on(service) group_left
label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "Alertmanager configurations are inconsistent",
description = "The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync."
}
ALERT AlertmanagerDownOrMissing
IF label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)")
/ on(job) group_right
sum by(job) (up) != 1
FOR 5m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Alertmanager down or not discovered",
description = "An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery."
}
ALERT FailedReload
IF alertmanager_config_last_reload_successful == 0
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Alertmanager configuration reload has failed",
description = "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}."
}
etcd3: |-
# general cluster availability
# alert if another failed member will result in an unavailable cluster
ALERT InsufficientMembers
IF count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1)
FOR 3m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "etcd cluster insufficient members",
description = "If one more etcd member goes down the cluster will be unavailable",
}
# etcd leader alerts
# ==================
# alert if any etcd instance has no leader
ALERT NoLeader
IF etcd_server_has_leader{job="etcd"} == 0
FOR 1m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "etcd member has no leader",
description = "etcd member {{ $labels.instance }} has no leader",
}
# alert if there are lots of leader changes
ALERT HighNumberOfLeaderChanges
IF increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of leader changes within the etcd cluster are happening",
description = "etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour",
}
# gRPC request alerts
# ===================
# alert if more than 1% of gRPC method calls have failed within the last 5 minutes
ALERT HighNumberOfFailedGRPCRequests
IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m]))
/ sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.01
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of gRPC requests are failing",
description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if more than 5% of gRPC method calls have failed within the last 5 minutes
ALERT HighNumberOfFailedGRPCRequests
IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m]))
/ sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.05
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "a high number of gRPC requests are failing",
description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if the 99th percentile of gRPC method calls take more than 150ms
ALERT GRPCRequestsSlow
IF histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "slow gRPC requests",
description = "on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method }} are slow",
}
# HTTP requests alerts
# ====================
# alert if more than 1% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m]))
/ sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.01
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if more than 5% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m]))
/ sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.05
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if the 99th percentile of HTTP requests take more than 150ms
ALERT HTTPRequestsSlow
IF histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "slow HTTP requests",
description = "on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow",
}
# etcd member communication alerts
# ================================
# alert if 99th percentile of round trips take 150ms
ALERT EtcdMemberCommunicationSlow
IF histogram_quantile(0.99, rate(etcd_network_member_round_trip_time_seconds_bucket[5m])) > 0.15
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "etcd member communication is slow",
description = "etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow",
}
# etcd proposal alerts
# ====================
# alert if there are several failed proposals within an hour
ALERT HighNumberOfFailedProposals
IF increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of proposals within the etcd cluster are failing",
description = "etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour",
}
# etcd disk io latency alerts
# ===========================
# alert if 99th percentile of fsync durations is higher than 500ms
ALERT HighFsyncDurations
IF histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) > 0.5
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "high fsync durations",
description = "etcd instance {{ $labels.instance }} fync durations are high",
}
# alert if 99th percentile of commit durations is higher than 250ms
ALERT HighCommitDurations
IF histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "high commit durations",
description = "etcd instance {{ $labels.instance }} commit durations are high",
}
kube_apiserver: |-
ALERT K8SApiserverDown
IF absent(up{job="apiserver"} == 1)
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "Prometheus failed to scrape API server(s), or all API servers have disappeared from service discovery.",
}
# Some verbs excluded because they are expected to be long-lasting:
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
#
# apiserver_request_latencies' unit is microseconds
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"})
) / 1e6 > 1.0
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
}
kube_controller_manager: |-
ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"} == 1)
FOR 5m
LABELS {
severity = "critical",
}
ANNOTATIONS {
summary = "Controller manager is down",
description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
}
kubelet: |-
ALERT K8SNodeNotReady
IF kube_node_status_ready{condition="true"} == 0
FOR 1h
LABELS {
severity = "warning",
}
ANNOTATIONS {
summary = "Node status is NotReady",
description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour",
}
ALERT K8SManyNodesNotReady
IF
count(kube_node_status_ready{condition="true"} == 0) > 1
AND
(
count(kube_node_status_ready{condition="true"} == 0)
/
count(kube_node_status_ready{condition="true"})
) > 0.2
FOR 1m
LABELS {
severity = "critical",
}
ANNOTATIONS {
summary = "Many Kubernetes nodes are Not Ready",
description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
}
ALERT K8SKubeletDown
IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
FOR 1h
LABELS {
severity = "warning",
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
description = "Prometheus failed to scrape {{ $value }}% of kubelets.",
}
ALERT K8SKubeletDown
IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
severity = "critical",
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
description = "Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets have disappeared from service discovery.",
}
ALERT K8SKubeletTooManyPods
IF kubelet_running_pod_count > 100
LABELS {
severity = "warning",
}
ANNOTATIONS {
summary = "Kubelet is close to pod limit",
description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110",
}
kubernetes: |-
# NOTE: These rules were kindly contributed by the SoundCloud engineering team.
### Container resources ###
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_memory_limit_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:spec_cpu_shares =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_cpu_shares{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:cpu_usage:rate =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
irate(
container_cpu_usage_seconds_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_working_set:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_working_set_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_rss:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_rss{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_cache:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_cache{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:disk_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_disk_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_pagefaults:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failures_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_oom:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failcnt{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
### Cluster resources ###
cluster:memory_allocation:percent =
100 * sum by (cluster) (
container_spec_memory_limit_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:memory_used:percent =
100 * sum by (cluster) (
container_memory_usage_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:cpu_allocation:percent =
100 * sum by (cluster) (
container_spec_cpu_shares{pod_name!=""}
) / sum by (cluster) (
container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores
)
cluster:node_cpu_use:percent =
100 * sum by (cluster) (
rate(node_cpu{mode!="idle"}[5m])
) / sum by (cluster) (
machine_cpu_cores
)
### API latency ###
# Raw metrics are in microseconds. Convert to seconds.
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(
0.99,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(
0.9,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(
0.5,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
### Scheduling latency ###
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
rabbitmq: |-
mysql: |-
ceph: |-
openstack: |-
custom: |-

View File

@ -20,6 +20,14 @@ chart_groups:
- docker_registry_redis
- docker_registry
- name: infra_monitoring
timeout: 600
charts:
- prometheus
- node_exporter
- kube_state_metrics
- alertmanager
charts:
docker_registry_nfs_provisioner:
chart_name: nfs-provisioner
@ -59,3 +67,58 @@ charts:
node_selector_value: primary
volume:
class_name: openstack-helm-bootstrap
prometheus:
chart_name: prometheus
release: prometheus
namespace: openstack
timeout: 300
test:
enabled: true
timeout: 300
output: false
values:
storage:
enabled: false
manifests:
pvc: false
network:
prometheus:
ingress:
public: false
kube_state_metrics:
chart_name: kube-state-metrics
release: prometheus-kube-metrics
namespace: kube-system
test:
enabled: false
timeout: 300
output: false
node_exporter:
chart_name: node-exporter
release: prometheus-node-exporter
namespace: kube-system
test:
enabled: false
timeout: 300
output: false
alertmanager:
chart_name: alertmanager
release: prometheus-alertmanager
namespace: openstack
test:
enabled: false
timeout: 300
output: false
values:
storage:
enabled: false
manifests:
pvc: false
network:
alertmanager:
ingress:
public: false