diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 93bceff058..7f16eee280 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -320,6 +320,14 @@ the table are linked to more details elsewhere in the user guide. | `monitoring_enabled`_ | - true | false | | | - false | | +---------------------------------------+--------------------+---------------+ +| `monitoring_retention_days`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `monitoring_retention_size`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `monitoring_storage_class_name`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ +| `monitoring_interval_seconds`_ | see below | see below | ++---------------------------------------+--------------------+---------------+ | `prometheus_operator_chart_tag`_ | see below | see below | +---------------------------------------+--------------------+---------------+ | `prometheus_adapter_enabled`_ | - true | true | @@ -1475,6 +1483,25 @@ _`monitoring_enabled` helm_client_tag master is secure or insecure PROTOCOL="https" INSECURE_SKIP_VERIFY="False" @@ -193,6 +198,7 @@ prometheus-operator: prometheus: prometheusSpec: + scrapeInterval: ${MONITORING_INTERVAL_SECONDS}s scrapeInterval: 30s evaluationInterval: 30s image: @@ -209,6 +215,8 @@ prometheus-operator: # - kube-controller-manager-certificates # - kube-scheduler-certificates # - kube-proxy-manager-certificates + retention: ${MONITORING_RETENTION_DAYS}d + retentionSize: ${MONITORING_RETENTION_SIZE_GB}GB resources: requests: cpu: ${PROMETHEUS_SERVER_CPU}m @@ -216,6 +224,21 @@ prometheus-operator: priorityClassName: "system-cluster-critical" EOF + ####################### + # Set up definitions for persistent storage using k8s storageClass + if [ "${MONITORING_STORAGE_CLASS_NAME}" != "" ]; then + cat << EOF >> ${HELM_CHART_DIR}/values.yaml + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: ${MONITORING_STORAGE_CLASS_NAME} + accessModes: ["ReadWriteMany"] + resources: + requests: + storage: ${MONITORING_RETENTION_SIZE}Gi +EOF + fi #END PERSISTENT STORAGE CONFIG + ####################### # Set up definitions for ingress objects @@ -225,17 +248,17 @@ EOF : elif [ "${INGRESS_CONTROLLER}" == "traefik" ]; then cat << EOF >> ${HELM_CHART_DIR}/values.yaml - additionalServiceMonitors: - - name: prometheus-traefik-metrics - selector: - matchLabels: - k8s-app: traefik - namespaceSelector: - matchNames: - - kube-system - endpoints: - - path: /metrics - port: metrics + additionalServiceMonitors: + - name: prometheus-traefik-metrics + selector: + matchLabels: + k8s-app: traefik + namespaceSelector: + matchNames: + - kube-system + endpoints: + - path: /metrics + port: metrics EOF fi #END INGRESS diff --git a/magnum/drivers/heat/k8s_fedora_template_def.py b/magnum/drivers/heat/k8s_fedora_template_def.py index 94a5c6aa4a..c5f2e1e4c9 100644 --- a/magnum/drivers/heat/k8s_fedora_template_def.py +++ b/magnum/drivers/heat/k8s_fedora_template_def.py @@ -98,6 +98,10 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition): 'metrics_server_enabled', 'metrics_server_chart_tag', 'monitoring_enabled', + 'monitoring_retention_days', + 'monitoring_retention_size', + 'monitoring_interval_seconds', + 'monitoring_storage_class_name', 'prometheus_operator_chart_tag', 'prometheus_adapter_enabled', 'prometheus_adapter_chart_tag', diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml index 01b95b5408..df8aa091a8 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml @@ -694,6 +694,28 @@ parameters: description: Enable or disable prometheus-operator monitoring solution. default: false + monitoring_retention_days: + type: number + description: The number of time (in days) that prometheus metrics should be kept. + default: 14 + + monitoring_retention_size: + type: number + description: > + The maximum memory (in Gi) allowed to be used by prometheus server to store metrics. + default: 14 + + monitoring_interval_seconds: + type: number + description: > + The time interval (in seconds) between consecutive metric scrapings. + default: 30 + + monitoring_storage_class_name: + type: string + description: The kubernetes storage class name to use for the prometheus pvc. + default: "" + prometheus_operator_chart_tag: type: string description: The stable/prometheus-operator chart version to use. @@ -1228,6 +1250,10 @@ resources: keystone_auth_enabled: {get_param: keystone_auth_enabled} k8s_keystone_auth_tag: {get_param: k8s_keystone_auth_tag} monitoring_enabled: {get_param: monitoring_enabled} + monitoring_retention_days: {get_param: monitoring_retention_days} + monitoring_retention_size: {get_param: monitoring_retention_size} + monitoring_interval_seconds: {get_param: monitoring_interval_seconds} + monitoring_storage_class_name: {get_param: monitoring_storage_class_name} prometheus_operator_chart_tag: {get_param: prometheus_operator_chart_tag} prometheus_adapter_enabled: {get_param: prometheus_adapter_enabled} prometheus_adapter_chart_tag: {get_param: prometheus_adapter_chart_tag} diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml index 3aea250151..8f9fd1c538 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml @@ -463,6 +463,24 @@ parameters: type: boolean description: Enable or disable prometheus-operator monitoring solution. + monitoring_retention_days: + type: number + description: The number of time (in days) that prometheus metrics should be kept. + + monitoring_retention_size: + type: number + description: > + The maximum memory (in Gi) allowed to be used by prometheus server to store metrics. + + monitoring_interval_seconds: + type: number + description: > + The time interval (in seconds) between consecutive metric scrapings. + + monitoring_storage_class_name: + type: string + description: The kubernetes storage class name to use for the prometheus pvc. + prometheus_operator_chart_tag: type: string description: The stable/prometheus-operator chart version to use. @@ -795,6 +813,10 @@ resources: "$KEYSTONE_AUTH_ENABLED": {get_param: keystone_auth_enabled} "$K8S_KEYSTONE_AUTH_TAG": {get_param: k8s_keystone_auth_tag} "$MONITORING_ENABLED": {get_param: monitoring_enabled} + "$MONITORING_RETENTION_DAYS": {get_param: monitoring_retention_days} + "$MONITORING_RETENTION_SIZE": {get_param: monitoring_retention_size} + "$MONITORING_INTERVAL_SECONDS": {get_param: monitoring_interval_seconds} + "$MONITORING_STORAGE_CLASS_NAME": {get_param: monitoring_storage_class_name} "$PROMETHEUS_OPERATOR_CHART_TAG": {get_param: prometheus_operator_chart_tag} "$PROMETHEUS_ADAPTER_ENABLED": {get_param: prometheus_adapter_enabled} "$PROMETHEUS_ADAPTER_CHART_TAG": {get_param: prometheus_adapter_chart_tag} diff --git a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml index c8fd3d0c52..b61b1a5571 100644 --- a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml +++ b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubecluster.yaml @@ -704,6 +704,28 @@ parameters: description: Enable or disable prometheus-operator monitoring solution. default: false + monitoring_retention_days: + type: number + description: The number of time (in days) that prometheus metrics should be kept. + default: 14 + + monitoring_retention_size: + type: number + description: > + The maximum memory (in Gi) allowed to be used by prometheus server to store metrics. + default: 14 + + monitoring_interval_seconds: + type: number + description: > + The time interval (in seconds) between consecutive metric scrapings. + default: 30 + + monitoring_storage_class_name: + type: string + description: The kubernetes storage class name to use for the prometheus pvc. + default: "" + prometheus_operator_chart_tag: type: string description: The stable/prometheus-operator chart version to use. @@ -1256,6 +1278,10 @@ resources: keystone_auth_enabled: {get_param: keystone_auth_enabled} k8s_keystone_auth_tag: {get_param: k8s_keystone_auth_tag} monitoring_enabled: {get_param: monitoring_enabled} + monitoring_retention_days: {get_param: monitoring_retention_days} + monitoring_retention_size: {get_param: monitoring_retention_size} + monitoring_interval_seconds: {get_param: monitoring_interval_seconds} + monitoring_storage_class_name: {get_param: monitoring_storage_class_name} prometheus_operator_chart_tag: {get_param: prometheus_operator_chart_tag} prometheus_adapter_enabled: {get_param: prometheus_adapter_enabled} prometheus_adapter_chart_tag: {get_param: prometheus_adapter_chart_tag} diff --git a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubemaster.yaml index f5b06c3f2d..ebc010a8b3 100644 --- a/magnum/drivers/k8s_fedora_coreos_v1/templates/kubemaster.yaml +++ b/magnum/drivers/k8s_fedora_coreos_v1/templates/kubemaster.yaml @@ -467,6 +467,24 @@ parameters: type: boolean description: Enable or disable prometheus-operator monitoring solution. + monitoring_retention_days: + type: number + description: The number of time (in days) that prometheus metrics should be kept. + + monitoring_retention_size: + type: number + description: > + The maximum memory (in Gi) allowed to be used by prometheus server to store metrics. + + monitoring_interval_seconds: + type: number + description: > + The time interval (in seconds) between consecutive metric scrapings. + + monitoring_storage_class_name: + type: string + description: The kubernetes storage class name to use for the prometheus pvc. + prometheus_operator_chart_tag: type: string description: The stable/prometheus-operator chart version to use. @@ -814,6 +832,10 @@ resources: "$KEYSTONE_AUTH_ENABLED": {get_param: keystone_auth_enabled} "$K8S_KEYSTONE_AUTH_TAG": {get_param: k8s_keystone_auth_tag} "$MONITORING_ENABLED": {get_param: monitoring_enabled} + "$MONITORING_RETENTION_DAYS": {get_param: monitoring_retention_days} + "$MONITORING_RETENTION_SIZE": {get_param: monitoring_retention_size} + "$MONITORING_INTERVAL_SECONDS": {get_param: monitoring_interval_seconds} + "$MONITORING_STORAGE_CLASS_NAME": {get_param: monitoring_storage_class_name} "$PROMETHEUS_OPERATOR_CHART_TAG": {get_param: prometheus_operator_chart_tag} "$PROMETHEUS_ADAPTER_ENABLED": {get_param: prometheus_adapter_enabled} "$PROMETHEUS_ADAPTER_CHART_TAG": {get_param: prometheus_adapter_chart_tag} diff --git a/magnum/tests/unit/drivers/test_template_definition.py b/magnum/tests/unit/drivers/test_template_definition.py index 7440bfed9f..cda0a92eca 100644 --- a/magnum/tests/unit/drivers/test_template_definition.py +++ b/magnum/tests/unit/drivers/test_template_definition.py @@ -539,6 +539,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'k8s_keystone_auth_tag') monitoring_enabled = mock_cluster.labels.get( 'monitoring_enabled') + monitoring_retention_days = mock_cluster.labels.get( + 'monitoring_retention_days') + monitoring_retention_size = mock_cluster.labels.get( + 'monitoring_retention_size') + monitoring_interval_seconds = mock_cluster.labels.get( + 'monitoring_interval_seconds') + monitoring_storage_class_name = mock_cluster.labels.get( + 'monitoring_storage_class_name') prometheus_operator_chart_tag = mock_cluster.labels.get( 'prometheus_operator_chart_tag') prometheus_adapter_enabled = mock_cluster.labels.get( @@ -674,6 +682,10 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'keystone_auth_enabled': keystone_auth_enabled, 'k8s_keystone_auth_tag': k8s_keystone_auth_tag, 'monitoring_enabled': monitoring_enabled, + 'monitoring_retention_days': monitoring_retention_days, + 'monitoring_retention_size': monitoring_retention_size, + 'monitoring_interval_seconds': monitoring_interval_seconds, + 'monitoring_storage_class_name': monitoring_storage_class_name, 'prometheus_operator_chart_tag': prometheus_operator_chart_tag, 'prometheus_adapter_enabled': prometheus_adapter_enabled, 'prometheus_adapter_chart_tag': prometheus_adapter_chart_tag, @@ -1070,6 +1082,14 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'k8s_keystone_auth_tag') monitoring_enabled = mock_cluster.labels.get( 'monitoring_enabled') + monitoring_retention_days = mock_cluster.labels.get( + 'monitoring_retention_days') + monitoring_retention_size = mock_cluster.labels.get( + 'monitoring_retention_size') + monitoring_interval_seconds = mock_cluster.labels.get( + 'monitoring_interval_seconds') + monitoring_storage_class_name = mock_cluster.labels.get( + 'monitoring_storage_class_name') prometheus_operator_chart_tag = mock_cluster.labels.get( 'prometheus_operator_chart_tag') prometheus_adapter_enabled = mock_cluster.labels.get( @@ -1208,6 +1228,10 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase): 'keystone_auth_enabled': keystone_auth_enabled, 'k8s_keystone_auth_tag': k8s_keystone_auth_tag, 'monitoring_enabled': monitoring_enabled, + 'monitoring_retention_days': monitoring_retention_days, + 'monitoring_retention_size': monitoring_retention_size, + 'monitoring_interval_seconds': monitoring_interval_seconds, + 'monitoring_storage_class_name': monitoring_storage_class_name, 'prometheus_operator_chart_tag': prometheus_operator_chart_tag, 'prometheus_adapter_enabled': prometheus_adapter_enabled, 'prometheus_adapter_chart_tag': prometheus_adapter_chart_tag, diff --git a/releasenotes/notes/monitoring_persistent_storage-c5857fc099bd2f65.yaml b/releasenotes/notes/monitoring_persistent_storage-c5857fc099bd2f65.yaml new file mode 100644 index 0000000000..cfeff4dfab --- /dev/null +++ b/releasenotes/notes/monitoring_persistent_storage-c5857fc099bd2f65.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + Added metrics_retention_days magnum label allowing user to specify + prometheus server scraped metrics retention days (default: 14). + Added metrics_retention_size_gi magnum label allowing user to specify + prometheus server metrics storage maximum size in Gi (default: 14). + Added metrics_interval_seconds allowing user to specify prometheus + scrape frequency in seconds (default: 30). + Added metrics_storage_class_name allowing user to specify the + storageClass to use as external retention for pod fail-over data + persistency.