diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 6af9439bdd..a9faa220ff 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -309,6 +309,8 @@ the table are linked to more details elsewhere in the user guide. | `monitoring_enabled`_ | - true | false | | | - false | | +---------------------------------------+--------------------+---------------+ +| `prometheus_operator_chart_version`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ | `swarm_strategy`_ | - spread | spread | | | - binpack | | | | - random | | @@ -1142,10 +1144,10 @@ _`container_infra_prefix` * gcr.io/google_containers/kubernetes-dashboard-amd64:v1.5.1 * gcr.io/google-containers/hyperkube:v1.12.1 * quay.io/coreos/configmap-reload:v0.0.1 - * quay.io/coreos/prometheus-config-reloader:v0.26.0 - * quay.io/coreos/prometheus-operator:v0.15.3 - * quay.io/prometheus/alertmanager:v0.15.3 - * quay.io/prometheus/prometheus:v2.5.0 + * quay.io/coreos/prometheus-config-reloader:v0.30.1 + * quay.io/coreos/prometheus-operator:v0.30.1 + * quay.io/prometheus/alertmanager:v0.17.0 + * quay.io/prometheus/prometheus:v2.9.1 * k8s.gcr.io/node-problem-detector:v0.6.2 * docker.io/planetlabs/draino:abf028a * docker.io/openstackmagnum/cluster-autoscaler:v1.0 @@ -1274,6 +1276,13 @@ _`monitoring_enabled` stable/prometheus-operator helm chart. Default: false +_`prometheus_operator_chart_version` + Add prometheus_operator_chart_version to select version of the + stable/prometheus-operator chart to install. When installing the chart, + helm will use the default values of the tag defined and overwrite them based + on the prometheus-operator-config ConfigMap currently defined. You must + certify that the versions are compatible. + _`tiller_enabled` If set to true, tiller will be deployed in the kube-system namespace. Defaults to false. diff --git a/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh b/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh index 1cb0882988..92a2ce8315 100644 --- a/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh +++ b/magnum/drivers/common/templates/kubernetes/fragments/write-heat-params-master.sh @@ -40,6 +40,7 @@ HEAT_PARAMS=/etc/sysconfig/heat-params CLUSTER_UUID="$CLUSTER_UUID" MAGNUM_URL="$MAGNUM_URL" MONITORING_ENABLED="$MONITORING_ENABLED" + PROMETHEUS_OPERATOR_CHART_VERSION="$PROMETHEUS_OPERATOR_CHART_VERSION" VOLUME_DRIVER="$VOLUME_DRIVER" REGION_NAME="$REGION_NAME" HTTP_PROXY="$HTTP_PROXY" diff --git a/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh b/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh index fc8a4cfd69..dafd1d6e96 100644 --- a/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh +++ b/magnum/drivers/common/templates/kubernetes/helm/prometheus-operator.sh @@ -10,10 +10,16 @@ printf "Starting to run ${step}\n" ### Configuration ############################################################################### CHART_NAME="prometheus-operator" -CHART_VERSION="0.1.31" +CHART_VERSION=${PROMETHEUS_OPERATOR_CHART_VERSION:-5.12.3} + if [ "$(echo ${MONITORING_ENABLED} | tr '[:upper:]' '[:lower:]')" = "true" ]; then + # Calculate resources needed to run the Prometheus Monitoring Solution + # MAX_NODE_COUNT so we can have metrics even if cluster scales + PROMETHEUS_SERVER_CPU=$(expr 128 + 7 \* ${MAX_NODE_COUNT} ) + PROMETHEUS_SERVER_RAM=$(expr 256 + 40 \* ${MAX_NODE_COUNT}) + # Validate if communication node <-> master is secure or insecure PROTOCOL="https" INSECURE_SKIP_VERIFY="False" @@ -53,11 +59,12 @@ data: done helm repo update - if [[ \$(helm history prometheus-operator | grep prometheus-operator) ]]; then + if [[ \$(helm history ${CHART_NAME} | grep ${CHART_NAME}) ]]; then echo "${CHART_NAME} already installed on server. Continue..." exit 0 else - helm install stable/${CHART_NAME} --namespace monitoring --name ${CHART_NAME} --version v${CHART_VERSION} --values /opt/magnum/install-${CHART_NAME}-values.yaml + # TODO: Set namespace to monitoring. This is needed as the Kubernetes default priorityClass can only be used in NS kube-system + helm install stable/${CHART_NAME} --namespace kube-system --name ${CHART_NAME} --version v${CHART_VERSION} --values /opt/magnum/install-${CHART_NAME}-values.yaml fi install-${CHART_NAME}-values.yaml: | @@ -68,10 +75,21 @@ data: alertmanagerSpec: image: repository: ${CONTAINER_INFRA_PREFIX:-quay.io/}prometheus/alertmanager + # # Needs testing + # resources: + # requests: + # cpu: 100m + # memory: 256Mi + priorityClassName: "system-cluster-critical" + # Dashboard grafana: #enabled: ${ENABLE_GRAFANA} + resources: + requests: + cpu: 100m + memory: 128Mi adminPassword: ${ADMIN_PASSWD} kubeApiServer: @@ -91,20 +109,35 @@ data: k8s-app: coredns kubeEtcd: - service: - port: 4001 - targetPort: 4001 - selector: - k8s-app: etcd-server + endpoints: + - ${KUBE_MASTER_IP} serviceMonitor: scheme: ${PROTOCOL} - insecureSkipVerify: ${INSECURE_SKIP_VERIFY} + insecureSkipVerify: true + serverName: ${KUBE_MASTER_IP} ## If Protocol is http this files should be neglected - caFile: ${CERT_DIR}/ca.crt - certFile: ${CERT_DIR}/kubelet.crt - keyFile: ${CERT_DIR}/kubelet.key + caFile: /etc/prometheus/secrets/etcd-certificates/ca.crt + certFile: /etc/prometheus/secrets/etcd-certificates/kubelet.crt + keyFile: /etc/prometheus/secrets/etcd-certificates/kubelet.key + + kube-state-metrics: + priorityClassName: "system-cluster-critical" + resources: + #Guaranteed + limits: + cpu: 50m + memory: 64M + + prometheus-node-exporter: + priorityClassName: "system-node-critical" + resources: + #Guaranteed + limits: + cpu: 20m + memory: 20M prometheusOperator: + priorityClassName: "system-cluster-critical" image: repository: ${CONTAINER_INFRA_PREFIX:-quay.io/}coreos/prometheus-operator configmapReloadImage: @@ -116,9 +149,19 @@ data: prometheus: prometheusSpec: + scrapeInterval: 1m + evaluationInterval: 1m image: repository: ${CONTAINER_INFRA_PREFIX:-quay.io/}prometheus/prometheus retention: 14d + resources: + requests: + cpu: ${PROMETHEUS_SERVER_CPU}m + memory: ${PROMETHEUS_SERVER_RAM}M + # secrets: + # - etcd-certificates + priorityClassName: "system-cluster-critical" + --- apiVersion: batch/v1 kind: Job @@ -132,7 +175,7 @@ spec: serviceAccountName: tiller containers: - name: config-helm - image: docker.io/openstackmagnum/helm-client:dev + image: ${CONTAINER_INFRA_PREFIX:-docker.io/openstackmagnum/}helm-client:dev command: - bash args: diff --git a/magnum/drivers/heat/k8s_fedora_template_def.py b/magnum/drivers/heat/k8s_fedora_template_def.py index 20a171d30e..0a49e8337e 100644 --- a/magnum/drivers/heat/k8s_fedora_template_def.py +++ b/magnum/drivers/heat/k8s_fedora_template_def.py @@ -132,6 +132,7 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition): 'heat_container_agent_tag', 'keystone_auth_enabled', 'k8s_keystone_auth_tag', 'monitoring_enabled', + 'prometheus_operator_chart_version', 'tiller_enabled', 'tiller_tag', 'tiller_namespace', diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml index d315f2bedb..28ebbcf413 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml @@ -577,6 +577,11 @@ parameters: description: Enable or disable prometheus-operator monitoring solution. default: false + prometheus_operator_chart_version: + type: string + description: The stable/prometheus-operator chart version to use. + default: 5.12.3 + project_id: type: string description: > @@ -929,6 +934,7 @@ resources: keystone_auth_enabled: {get_param: keystone_auth_enabled} k8s_keystone_auth_tag: {get_param: k8s_keystone_auth_tag} monitoring_enabled: {get_param: monitoring_enabled} + prometheus_operator_chart_version: {get_param: prometheus_operator_chart_version} project_id: {get_param: project_id} tiller_enabled: {get_param: tiller_enabled} tiller_tag: {get_param: tiller_tag} diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml index fb48d6b06f..5bae57bbd0 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml @@ -430,6 +430,10 @@ parameters: description: Enable or disable prometheus-operator monitoring solution. default: false + prometheus_operator_chart_version: + type: string + description: The stable/prometheus-operator chart version to use. + project_id: type: string description: > @@ -613,6 +617,7 @@ resources: "$KEYSTONE_AUTH_ENABLED": {get_param: keystone_auth_enabled} "$K8S_KEYSTONE_AUTH_TAG": {get_param: k8s_keystone_auth_tag} "$MONITORING_ENABLED": {get_param: monitoring_enabled} + "$PROMETHEUS_OPERATOR_CHART_VERSION": {get_param: PROMETHEUS_OPERATOR_CHART_VERSION} "$PROJECT_ID": {get_param: project_id} "$EXTERNAL_NETWORK_ID": {get_param: external_network} "$TILLER_ENABLED": {get_param: tiller_enabled} diff --git a/magnum/tests/unit/drivers/test_template_definition.py b/magnum/tests/unit/drivers/test_template_definition.py index 068d2776f5..37138d88f8 100644 --- a/magnum/tests/unit/drivers/test_template_definition.py +++ b/magnum/tests/unit/drivers/test_template_definition.py @@ -510,6 +510,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'k8s_keystone_auth_tag') monitoring_enabled = mock_cluster.labels.get( 'monitoring_enabled') + prometheus_operator_chart_version = mock_cluster.labels.get( + 'prometheus_operator_chart_version') project_id = mock_cluster.project_id tiller_enabled = mock_cluster.labels.get( 'tiller_enabled') @@ -589,6 +591,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'keystone_auth_enabled': keystone_auth_enabled, 'k8s_keystone_auth_tag': k8s_keystone_auth_tag, 'monitoring_enabled': monitoring_enabled, + 'prometheus_operator_chart_version': prometheus_operator_chart_version, 'project_id': project_id, 'external_network': external_network_id, 'tiller_enabled': tiller_enabled, @@ -912,6 +915,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'k8s_keystone_auth_tag') monitoring_enabled = mock_cluster.labels.get( 'monitoring_enabled') + prometheus_operator_chart_version = mock_cluster.labels.get( + 'prometheus_operator_chart_version') project_id = mock_cluster.project_id tiller_enabled = mock_cluster.labels.get( 'tiller_enabled') @@ -993,6 +998,7 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'keystone_auth_enabled': keystone_auth_enabled, 'k8s_keystone_auth_tag': k8s_keystone_auth_tag, 'monitoring_enabled': monitoring_enabled, + 'prometheus_operator_chart_version': prometheus_operator_chart_version, 'project_id': project_id, 'external_network': external_network_id, 'tiller_enabled': tiller_enabled, diff --git a/releasenotes/notes/helm-install-prometheus-operator-ea87752bc57a0945.yaml b/releasenotes/notes/helm-install-prometheus-operator-ea87752bc57a0945.yaml index 7a7424447a..83ffa541ea 100644 --- a/releasenotes/notes/helm-install-prometheus-operator-ea87752bc57a0945.yaml +++ b/releasenotes/notes/helm-install-prometheus-operator-ea87752bc57a0945.yaml @@ -5,4 +5,4 @@ features: solution by means of helm stable/prometheus-operator public chart. Defaults to false. grafana_admin_passwd label can be used to set grafana dashboard admin access password. If grafana_admin_passwd - is not set the password defaults to prom_operator. + is not set the password defaults to prom-operator.