Update magnum k8s monitoring infra
* Prometheus-server now runs only on master nodes. * Update prometheus-operator helm chart and tag. * Update prometheus-adapter version. * Deprecation notice for prometheus_monitoring component. task: 41569 story: 2006765 Signed-off-by: Diogo Guerra <diogo.filipe.tomas.guerra@cern.ch> Change-Id: I05e8c2be4e4c8e66a166b485ec7851875dca8b1c
This commit is contained in:
parent
61c7f7b34b
commit
7b257e94b1
|
@ -1264,13 +1264,14 @@ _`container_infra_prefix`
|
|||
|
||||
Images that might be needed if 'monitoring_enabled' is 'true':
|
||||
|
||||
* quay.io/prometheus/alertmanager:v0.20.0
|
||||
* docker.io/squareup/ghostunnel:v1.5.2
|
||||
* docker.io/jettech/kube-webhook-certgen:v1.0.0
|
||||
* quay.io/coreos/prometheus-operator:v0.37.0
|
||||
* quay.io/coreos/configmap-reload:v0.0.1
|
||||
* quay.io/coreos/prometheus-config-reloader:v0.37.0
|
||||
* quay.io/prometheus/prometheus:v2.15.2
|
||||
* quay.io/prometheus/alertmanager:v0.21.0
|
||||
* docker.io/jettech/kube-webhook-certgen:v1.5.0
|
||||
* quay.io/prometheus-operator/prometheus-operator:v0.44.0
|
||||
* docker.io/jimmidyson/configmap-reload:v0.4.0
|
||||
* quay.io/prometheus-operator/prometheus-config-reloader:v0.44.0
|
||||
* quay.io/prometheus/prometheus:v2.22.1
|
||||
* quay.io/prometheus/node-exporter:v1.0.1
|
||||
* docker.io/directxman12/k8s-prometheus-adapter:v0.8.2
|
||||
|
||||
Images that might be needed if 'cinder_csi_enabled' is 'true':
|
||||
|
||||
|
|
|
@ -35,15 +35,15 @@ _`metrics_server_enabled`
|
|||
|
||||
_`monitoring_enabled`
|
||||
Enable installation of cluster monitoring solution provided by the
|
||||
stable/prometheus-operator helm chart.
|
||||
prometheus-community/kube-prometheus-stack helm chart.
|
||||
To use this service tiller_enabled must be true when using
|
||||
helm_client_tag<v3.0.0.
|
||||
Default: false
|
||||
|
||||
_`prometheus_adapter_enabled`
|
||||
Enable installation of cluster custom metrics provided by the
|
||||
stable/prometheus-adapter helm chart. This service depends on
|
||||
monitoring_enabled.
|
||||
prometheus-community/prometheus-adapter helm chart.
|
||||
This service depends on monitoring_enabled.
|
||||
Default: true
|
||||
|
||||
To control deployed versions, extra labels are available:
|
||||
|
@ -56,14 +56,17 @@ _`metrics_server_chart_tag`
|
|||
|
||||
_`prometheus_operator_chart_tag`
|
||||
Add prometheus_operator_chart_tag to select version of the
|
||||
stable/prometheus-operator chart to install. When installing the chart,
|
||||
helm will use the default values of the tag defined and overwrite them based
|
||||
on the prometheus-operator-config ConfigMap currently defined. You must
|
||||
certify that the versions are compatible.
|
||||
prometheus-community/kube-prometheus-stack chart to install.
|
||||
When installing the chart, helm will use the default values of the tag
|
||||
defined and overwrite them based on the prometheus-operator-config
|
||||
ConfigMap currently defined.
|
||||
You must certify that the versions are compatible.
|
||||
Wallaby-default: 17.2.0
|
||||
|
||||
_`prometheus_adapter_chart_tag`
|
||||
The stable/prometheus-adapter helm chart version to use.
|
||||
The prometheus-community/prometheus-adapter helm chart version to use.
|
||||
Train-default: 1.4.0
|
||||
Wallaby-default: 2.12.1
|
||||
|
||||
Full fledged cluster monitoring
|
||||
+++++++++++++++++++++++++++++++
|
||||
|
|
|
@ -21,10 +21,11 @@ EOF
|
|||
cat << EOF >> ${HELM_CHART_DIR}/values.yaml
|
||||
prometheus-adapter:
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-docker.io/directxman12/}k8s-prometheus-adapter-${ARCH}
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/prometheus-adapter/}prometheus-adapter
|
||||
priorityClassName: "system-cluster-critical"
|
||||
prometheus:
|
||||
url: http://web.tcp.prometheus-prometheus.kube-system.svc.cluster.local
|
||||
url: http://web.tcp.magnum-kube-prometheus-sta-prometheus.kube-system.svc.cluster.local
|
||||
path: /prometheus
|
||||
resources:
|
||||
requests:
|
||||
cpu: 150m
|
||||
|
|
|
@ -2,7 +2,7 @@ set +x
|
|||
. /etc/sysconfig/heat-params
|
||||
set -ex
|
||||
|
||||
CHART_NAME="prometheus-operator"
|
||||
CHART_NAME="kube-prometheus-stack"
|
||||
|
||||
if [ "$(echo ${MONITORING_ENABLED} | tr '[:upper:]' '[:lower:]')" = "true" ]; then
|
||||
echo "Writing ${CHART_NAME} config"
|
||||
|
@ -80,22 +80,18 @@ EOF
|
|||
PROTOCOL="http"
|
||||
INSECURE_SKIP_VERIFY="True"
|
||||
fi
|
||||
# FIXME: Force protocol to http as we don't want to use the cluster certs
|
||||
USE_HTTPS="False"
|
||||
|
||||
if [ "$(echo ${VERIFY_CA} | tr '[:upper:]' '[:lower:]')" == "false" ]; then
|
||||
INSECURE_SKIP_VERIFY="True"
|
||||
fi
|
||||
|
||||
cat << EOF >> ${HELM_CHART_DIR}/values.yaml
|
||||
prometheus-operator:
|
||||
|
||||
defaultRules:
|
||||
rules:
|
||||
#TODO: To enable this we need firstly take care of exposing certs
|
||||
etcd: false
|
||||
kube-prometheus-stack:
|
||||
|
||||
alertmanager:
|
||||
podDisruptionBudget:
|
||||
enabled: true
|
||||
#config:
|
||||
ingress:
|
||||
enabled: ${MONITORING_INGRESS_ENABLED}
|
||||
annotations:
|
||||
|
@ -108,6 +104,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
- ${CLUSTER_ROOT_DOMAIN_NAME}
|
||||
paths:
|
||||
- /alertmanager${APP_INGRESS_PATH_APPEND}
|
||||
pathType: ImplementationSpecific
|
||||
## TLS configuration for Alertmanager Ingress
|
||||
## Secret must be manually created in the namespace
|
||||
tls: []
|
||||
|
@ -118,8 +115,8 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}alertmanager
|
||||
logFormat: json
|
||||
routePrefix: /alertmanager
|
||||
externalUrl: https://${CLUSTER_ROOT_DOMAIN_NAME}/alertmanager
|
||||
# routePrefix: /alertmanager
|
||||
# resources:
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
|
@ -127,15 +124,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
priorityClassName: "system-cluster-critical"
|
||||
|
||||
grafana:
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-grafana/}grafana
|
||||
#enabled: ${ENABLE_GRAFANA}
|
||||
sidecar:
|
||||
image: ${CONTAINER_INFRA_PREFIX:-kiwigrid/}k8s-sidecar:0.1.99
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
adminPassword: ${GRAFANA_ADMIN_PASSWD}
|
||||
ingress:
|
||||
enabled: ${MONITORING_INGRESS_ENABLED}
|
||||
|
@ -146,13 +135,24 @@ ${APP_INGRESS_ANNOTATIONS}
|
|||
## Must be provided if Ingress is enable.
|
||||
hosts:
|
||||
- ${CLUSTER_ROOT_DOMAIN_NAME}
|
||||
path: /grafana${APP_INGRESS_PATH_APPEND}
|
||||
paths:
|
||||
- /grafana${APP_INGRESS_PATH_APPEND}
|
||||
pathType: ImplementationSpecific
|
||||
## TLS configuration for grafana Ingress
|
||||
## Secret must be manually created in the namespace
|
||||
tls: []
|
||||
# - secretName: grafana-general-tls
|
||||
# hosts:
|
||||
# - grafana.example.com
|
||||
sidecar:
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/kiwigrid/}k8s-sidecar
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-grafana/}grafana
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
persistence:
|
||||
enabled: ${APP_GRAFANA_PERSISTENT_STORAGE}
|
||||
storageClassName: ${MONITORING_STORAGE_CLASS_NAME}
|
||||
|
@ -162,21 +162,10 @@ ${APP_INGRESS_ANNOTATIONS}
|
|||
domain: ${CLUSTER_ROOT_DOMAIN_NAME}
|
||||
root_url: https://${CLUSTER_ROOT_DOMAIN_NAME}/grafana
|
||||
serve_from_sub_path: true
|
||||
paths:
|
||||
data: /var/lib/grafana/data
|
||||
logs: /var/log/grafana
|
||||
plugins: /var/lib/grafana/plugins
|
||||
provisioning: /etc/grafana/provisioning
|
||||
analytics:
|
||||
check_for_updates: true
|
||||
log:
|
||||
mode: console
|
||||
log.console:
|
||||
format: json
|
||||
grafana_net:
|
||||
url: https://grafana.net
|
||||
plugins:
|
||||
- grafana-piechart-panel
|
||||
|
||||
kubeApiServer:
|
||||
tlsConfig:
|
||||
|
@ -198,9 +187,9 @@ ${APP_INGRESS_ANNOTATIONS}
|
|||
serviceMonitor:
|
||||
## Enable scraping kube-controller-manager over https.
|
||||
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
|
||||
https: ${USE_HTTPS}
|
||||
https: "True"
|
||||
# Skip TLS certificate validation when scraping
|
||||
insecureSkipVerify: null
|
||||
insecureSkipVerify: "True"
|
||||
# Name of the server to use when validating TLS certificate
|
||||
serverName: null
|
||||
|
||||
|
@ -242,19 +231,21 @@ ${APP_INGRESS_ANNOTATIONS}
|
|||
serviceMonitor:
|
||||
## Enable scraping kube-scheduler over https.
|
||||
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
|
||||
https: ${USE_HTTPS}
|
||||
https: "True"
|
||||
## Skip TLS certificate validation when scraping
|
||||
insecureSkipVerify: null
|
||||
insecureSkipVerify: "True"
|
||||
## Name of the server to use when validating TLS certificate
|
||||
serverName: null
|
||||
|
||||
# kubeProxy:
|
||||
# ## If your kube proxy is not deployed as a pod, specify IPs it can be found on
|
||||
# endpoints: [] # masters + minions
|
||||
# serviceMonitor:
|
||||
# ## Enable scraping kube-proxy over https.
|
||||
# ## Requires proper certs (not self-signed) and delegated authentication/authorization checks
|
||||
# https: ${USE_HTTPS}
|
||||
kubeProxy:
|
||||
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
|
||||
endpoints: ${KUBE_MASTERS_PRIVATE} # masters + minions
|
||||
serviceMonitor:
|
||||
## Enable scraping kube-proxy over https.
|
||||
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
|
||||
https: "True"
|
||||
## Skip TLS certificate validation when scraping
|
||||
insecureSkipVerify: "True"
|
||||
|
||||
kube-state-metrics:
|
||||
priorityClassName: "system-cluster-critical"
|
||||
|
@ -271,37 +262,34 @@ ${APP_INGRESS_ANNOTATIONS}
|
|||
limits:
|
||||
cpu: 20m
|
||||
memory: 20M
|
||||
extraArgs:
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
|
||||
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
|
||||
sidecars: []
|
||||
## - name: nvidia-dcgm-exporter
|
||||
## image: nvidia/dcgm-exporter:1.4.3
|
||||
|
||||
prometheusOperator:
|
||||
priorityClassName: "system-cluster-critical"
|
||||
tlsProxy:
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-squareup/}ghostunnel
|
||||
admissionWebhooks:
|
||||
patch:
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-jettech/}kube-webhook-certgen
|
||||
priorityClassName: "system-cluster-critical"
|
||||
|
||||
resources: {}
|
||||
# requests:
|
||||
# cpu: 5m
|
||||
# memory: 10Mi
|
||||
resources:
|
||||
requests:
|
||||
cpu: 2m
|
||||
limits:
|
||||
memory: 30M
|
||||
# clusterDomain: ${CLUSTER_ROOT_DOMAIN_NAME}
|
||||
priorityClassName: "system-cluster-critical"
|
||||
logFormat: json
|
||||
logLevel: info
|
||||
resources:
|
||||
requests:
|
||||
cpu: 2m
|
||||
limits:
|
||||
memory: 32M
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}prometheus-operator
|
||||
configmapReloadImage:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}configmap-reload
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus-operator/}prometheus-operator
|
||||
prometheusDefaultBaseImage: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}prometheus
|
||||
alertmanagerDefaultBaseImage: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}alertmanager
|
||||
prometheusConfigReloaderImage:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/coreos/}prometheus-config-reloader
|
||||
hyperkubeImage:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-k8s.gcr.io/}hyperkube
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus-operator/}prometheus-config-reloader
|
||||
thanosImage:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/thanos/}thanos
|
||||
|
||||
prometheus:
|
||||
ingress:
|
||||
|
@ -317,6 +305,7 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
- ${CLUSTER_ROOT_DOMAIN_NAME}
|
||||
paths:
|
||||
- /prometheus${APP_INGRESS_PATH_APPEND}
|
||||
pathType: ImplementationSpecific
|
||||
## TLS configuration for Prometheus Ingress
|
||||
## Secret must be manually created in the namespace
|
||||
tls: []
|
||||
|
@ -332,11 +321,13 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
bearerTokenFile:
|
||||
prometheusSpec:
|
||||
scrapeInterval: ${MONITORING_INTERVAL_SECONDS}s
|
||||
scrapeInterval: 30s
|
||||
evaluationInterval: 30s
|
||||
image:
|
||||
repository: ${CONTAINER_INFRA_PREFIX:-quay.io/prometheus/}prometheus
|
||||
retention: 14d
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/master"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
externalLabels:
|
||||
cluster_uuid: ${CLUSTER_UUID}
|
||||
externalUrl: https://${CLUSTER_ROOT_DOMAIN_NAME}/prometheus
|
||||
|
@ -352,7 +343,16 @@ ${APP_INGRESS_BASIC_AUTH_ANNOTATIONS}
|
|||
retention: ${MONITORING_RETENTION_DAYS}d
|
||||
retentionSize: ${MONITORING_RETENTION_SIZE_GB}GB
|
||||
logFormat: json
|
||||
#routePrefix: /prometheus
|
||||
routePrefix: /prometheus
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: magnum.openstack.org/role
|
||||
operator: In
|
||||
values:
|
||||
- master
|
||||
resources:
|
||||
requests:
|
||||
cpu: ${PROMETHEUS_SERVER_CPU}m
|
||||
|
|
|
@ -740,8 +740,8 @@ parameters:
|
|||
|
||||
prometheus_operator_chart_tag:
|
||||
type: string
|
||||
description: The stable/prometheus-operator chart version to use.
|
||||
default: v8.12.13
|
||||
description: The prometheus-community/kube-prometheus-stack chart version to use.
|
||||
default: 17.2.0
|
||||
|
||||
prometheus_adapter_enabled:
|
||||
type: boolean
|
||||
|
@ -750,8 +750,8 @@ parameters:
|
|||
|
||||
prometheus_adapter_chart_tag:
|
||||
type: string
|
||||
description: The stable/prometheus-adapter chart version to use.
|
||||
default: 1.4.0
|
||||
description: The prometheus-community/prometheus-adapter chart version to use.
|
||||
default: 2.5.1
|
||||
|
||||
prometheus_adapter_configmap:
|
||||
type: string
|
||||
|
@ -1051,6 +1051,10 @@ resources:
|
|||
- protocol: udp
|
||||
port_range_min: 8472
|
||||
port_range_max: 8472
|
||||
# Prometheus Server
|
||||
- protocol: tcp
|
||||
port_range_min: 9090
|
||||
port_range_max: 9090
|
||||
|
||||
secgroup_kube_minion:
|
||||
condition: create_cluster_resources
|
||||
|
|
|
@ -754,8 +754,8 @@ parameters:
|
|||
|
||||
prometheus_operator_chart_tag:
|
||||
type: string
|
||||
description: The stable/prometheus-operator chart version to use.
|
||||
default: v8.12.13
|
||||
description: The prometheus-community/kube-prometheus-stack chart version to use.
|
||||
default: 33.0.0
|
||||
|
||||
prometheus_adapter_enabled:
|
||||
type: boolean
|
||||
|
@ -764,8 +764,8 @@ parameters:
|
|||
|
||||
prometheus_adapter_chart_tag:
|
||||
type: string
|
||||
description: The stable/prometheus-adapter chart version to use.
|
||||
default: 1.4.0
|
||||
description: The prometheus-community/prometheus-adapter chart version to use.
|
||||
default: 3.0.2
|
||||
|
||||
prometheus_adapter_configmap:
|
||||
type: string
|
||||
|
@ -1082,6 +1082,10 @@ resources:
|
|||
- protocol: udp
|
||||
port_range_min: 8472
|
||||
port_range_max: 8472
|
||||
# Prometheus Server
|
||||
- protocol: tcp
|
||||
port_range_min: 9090
|
||||
port_range_max: 9090
|
||||
|
||||
secgroup_kube_minion:
|
||||
condition: create_cluster_resources
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
upgrade:
|
||||
- Prometheus-Adapter helm chart updated to 2.12.1 from 1.4.0.
|
||||
- Prometheus-Operator helm chart updated to kube-prometheus-stack:17.2.0
|
||||
from prometheus-operator:v8.12.13.
|
||||
- Prometheus-server now runs only on master nodes
|
||||
|
||||
deprecations:
|
||||
- Enabling monitoring using the prometheus_monitoring label is deprecated
|
||||
and will be removed in the X cycle.
|
Loading…
Reference in New Issue