diff --git a/grafana/templates/deployment.yaml b/grafana/templates/deployment.yaml index 6b9911d0c..9ee3fb0e0 100644 --- a/grafana/templates/deployment.yaml +++ b/grafana/templates/deployment.yaml @@ -93,6 +93,8 @@ spec: mountPath: /etc/grafana - name: pod-screenshots-grafana mountPath: /var/lib/grafana/png + - name: pod-dashboards-grafana + mountPath: /etc/grafana/dashboards - name: pod-provisioning-grafana mountPath: {{ .Values.conf.grafana.paths.provisioning }} - name: grafana-bin @@ -126,6 +128,8 @@ spec: emptyDir: {} - name: pod-screenshots-grafana emptyDir: {} + - name: pod-dashboards-grafana + emptyDir: {} - name: pod-provisioning-grafana emptyDir: {} - name: grafana-bin diff --git a/grafana/values.yaml b/grafana/values.yaml index 3e73da7a9..6118bb556 100644 --- a/grafana/values.yaml +++ b/grafana/values.yaml @@ -482,17589 +482,4 @@ conf: level: info grafana_net: url: https://grafana.net - dashboards: - prometheus: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: Prometheus which you want to monitor - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.6.0 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: panel - id: text - name: Text - version: '' - annotations: - list: - - builtIn: 1 - datasource: "-- Grafana --" - enable: true - hide: true - iconColor: rgba(0, 211, 255, 1) - name: Annotations & Alerts - type: dashboard - - datasource: "${DS_PROMETHEUS}" - enable: true - expr: count(sum(up{instance="$instance"}) by (instance) < 1) - hide: false - iconColor: rgb(250, 44, 18) - limit: 100 - name: downage - showIn: 0 - step: 30s - tagKeys: instance - textFormat: prometheus down - titleFormat: Downage - type: alert - - datasource: "${DS_PROMETHEUS}" - enable: true - expr: sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) - by (instance) - hide: false - iconColor: "#fceaca" - limit: 100 - name: Reload - showIn: 0 - step: 5m - tagKeys: instance - tags: [] - titleFormat: Reload - type: tags - description: Dashboard for monitoring of Prometheus v2.x.x - editable: true - gnetId: 3681 - graphTooltip: 1 - hideControls: false - id: - links: - - icon: info - tags: [] - targetBlank: true - title: 'Dashboard''s Github ' - tooltip: Github repo of this dashboard - type: link - url: https://github.com/FUSAKLA/Prometheus2-grafana-dashboard - - icon: doc - tags: [] - targetBlank: true - title: Prometheus Docs - tooltip: '' - type: link - url: http://prometheus.io/docs/introduction/overview/ - refresh: 5m - rows: - - collapse: false - height: 161 - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - "#299c46" - - rgba(237, 129, 40, 0.89) - - "#bf1b00" - datasource: "${DS_PROMETHEUS}" - decimals: 1 - format: s - gauge: - maxValue: 1000000 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 41 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: time() - process_start_time_seconds{instance="$instance"} - format: time_series - instant: false - intervalFactor: 2 - refId: A - thresholds: '' - title: Uptime - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - "#299c46" - - rgba(237, 129, 40, 0.89) - - "#bf1b00" - datasource: "${DS_PROMETHEUS}" - format: short - gauge: - maxValue: 1000000 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 42 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: prometheus_tsdb_head_series{instance="$instance"} - format: time_series - instant: false - intervalFactor: 2 - refId: A - thresholds: '500000,800000,1000000' - title: Total count of time series - type: singlestat - valueFontSize: 150% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - "#299c46" - - rgba(237, 129, 40, 0.89) - - "#d44a3a" - datasource: "${DS_PROMETHEUS}" - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 48 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: version - targets: - - expr: prometheus_build_info{instance="$instance"} - format: table - instant: true - intervalFactor: 2 - refId: A - thresholds: '' - title: Version - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - "#299c46" - - rgba(237, 129, 40, 0.89) - - "#d44a3a" - datasource: "${DS_PROMETHEUS}" - decimals: 2 - format: ms - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 49 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: prometheus_tsdb_head_max_time{instance="$instance"} - prometheus_tsdb_head_min_time{instance="$instance"} - format: time_series - instant: true - intervalFactor: 2 - refId: A - thresholds: '' - title: Actual head block length - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - content: - height: '' - id: 50 - links: [] - mode: html - span: 1 - title: '' - transparent: true - type: text - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - "#e6522c" - - rgba(237, 129, 40, 0.89) - - "#299c46" - datasource: "${DS_PROMETHEUS}" - decimals: 1 - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 52 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: '2' - format: time_series - intervalFactor: 2 - refId: A - thresholds: '10,20' - title: '' - transparent: true - type: singlestat - valueFontSize: 200% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Header instance info - titleSize: h6 - - collapse: false - height: '250' - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 15 - legend: - avg: true - current: false - max: false - min: false - show: false - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: true - steppedLine: false - targets: - - expr: max(prometheus_engine_query_duration_seconds{instance="$instance"}) by - (instance, slice) - format: time_series - intervalFactor: 1 - legendFormat: max duration for {{slice}} - metric: prometheus_local_storage_rushed_mode - refId: A - step: 900 - thresholds: [] - timeFrom: - timeShift: - title: Query elapsed time - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: '' - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 17 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_tsdb_head_series_created_total{instance="$instance"}[$aggregation_interval])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: created on {{ instance }} - metric: prometheus_local_storage_maintain_series_duration_seconds_count - refId: A - step: 1800 - - expr: sum(increase(prometheus_tsdb_head_series_removed_total{instance="$instance"}[$aggregation_interval])) - by (instance) * -1 - format: time_series - intervalFactor: 2 - legendFormat: removed on {{ instance }} - refId: B - thresholds: [] - timeFrom: - timeShift: - title: Head series created/deleted - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 13 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: exceeded_sample_limit on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: A - step: 1800 - - expr: sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: duplicate_timestamp on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: B - step: 1800 - - expr: sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: out_of_bounds on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: C - step: 1800 - - expr: sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: out_of_order on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: D - step: 1800 - - expr: sum(increase(prometheus_rule_evaluation_failures_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: rule_evaluation_failure on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: G - step: 1800 - - expr: sum(increase(prometheus_tsdb_compactions_failed_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: tsdb_compactions_failed on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: K - step: 1800 - - expr: sum(increase(prometheus_tsdb_reloads_failures_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: tsdb_reloads_failures on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: L - step: 1800 - - expr: sum(increase(prometheus_tsdb_head_series_not_found{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: head_series_not_found on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: N - step: 1800 - - expr: sum(increase(prometheus_evaluator_iterations_missed_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: evaluator_iterations_missed on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: O - step: 1800 - - expr: sum(increase(prometheus_evaluator_iterations_skipped_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: evaluator_iterations_skipped on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: P - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Prometheus errors - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Main info - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - description: '' - editable: true - error: false - fill: 1 - grid: {} - id: 25 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: false - show: false - sort: max - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: prometheus_target_interval_length_seconds{instance="$instance",quantile="0.99"} - - 60 - format: time_series - interval: 2m - intervalFactor: 1 - legendFormat: "{{instance}}" - metric: '' - refId: A - step: 300 - thresholds: [] - timeFrom: - timeShift: - title: Scrape delay (counts with 1m scrape interval) - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - logBase: 1 - max: - min: - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 14 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: Queue length - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_evaluator_duration_seconds{instance="$instance"}) by (instance, - quantile) - format: time_series - intervalFactor: 2 - legendFormat: Queue length - metric: prometheus_local_storage_indexing_queue_length - refId: B - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Rule evaulation duration - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Scrape & rule duration - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 18 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(increase(http_requests_total{instance="$instance"}[$aggregation_interval])) - by (instance, handler) > 0 - format: time_series - intervalFactor: 2 - legendFormat: "{{ handler }} on {{ instance }}" - metric: '' - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Request count - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: none - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 16 - legend: - avg: false - current: false - hideEmpty: true - hideZero: true - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: max(sum(http_request_duration_microseconds{instance="$instance"}) by (instance, - handler, quantile)) by (instance, handler) > 0 - format: time_series - hide: false - intervalFactor: 2 - legendFormat: "{{ handler }} on {{ instance }}" - refId: B - thresholds: [] - timeFrom: - timeShift: - title: Request duration per handler - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: µs - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 19 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(increase(http_request_size_bytes{instance="$instance", quantile="0.99"}[$aggregation_interval])) - by (instance, handler) > 0 - format: time_series - hide: false - intervalFactor: 2 - legendFormat: "{{ handler }} in {{ instance }}" - refId: B - thresholds: [] - timeFrom: - timeShift: - title: Request size by handler - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Allocated bytes: "#F9BA8F" - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max count collector: "#bf1b00" - Max count harvester: "#bf1b00" - Max to persist: "#3F6833" - RSS: "#890F02" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 8 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/Max.*/" - fill: 0 - linewidth: 2 - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_engine_queries{instance="$instance"}) by (instance, handler) - format: time_series - intervalFactor: 2 - legendFormat: 'Current count ' - metric: last - refId: A - step: 1800 - - expr: sum(prometheus_engine_queries_concurrent_max{instance="$instance"}) by - (instance, handler) - format: time_series - intervalFactor: 2 - legendFormat: Max count - metric: last - refId: B - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Cont of concurent queries - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Requests & queries - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: - Alert queue capacity on o collector: "#bf1b00" - Alert queue capacity on o harvester: "#bf1b00" - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 20 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/.*capacity.*/" - fill: 0 - linewidth: 2 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_notifications_queue_capacity{instance="$instance"})by (instance) - format: time_series - intervalFactor: 2 - legendFormat: 'Alert queue capacity ' - metric: prometheus_local_storage_checkpoint_last_size_bytes - refId: A - step: 1800 - - expr: sum(prometheus_notifications_queue_length{instance="$instance"})by (instance) - format: time_series - intervalFactor: 2 - legendFormat: 'Alert queue size on ' - metric: prometheus_local_storage_checkpoint_last_size_bytes - refId: B - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Alert queue size - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 21 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_notifications_alertmanagers_discovered{instance="$instance"}) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: Checkpoint chunks written/s - metric: prometheus_local_storage_checkpoint_series_chunks_written_sum - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Count of discovered alertmanagers - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: none - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 39 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_notifications_dropped_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: notifications_dropped on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: F - step: 1800 - - expr: sum(increase(prometheus_rule_evaluation_failures_total{rule_type="alerting",instance="$instance"}[$aggregation_interval])) - by (rule_type,instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: rule_evaluation_failures on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Alerting errors - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Alerting - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 45 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: increase(prometheus_target_sync_length_seconds_count{scrape_job="kubernetes-service-endpoints"}[$aggregation_interval]) - format: time_series - intervalFactor: 2 - legendFormat: Count of target synces - refId: A - step: 240 - thresholds: [] - timeFrom: - timeShift: - title: Kubernetes SD sync count - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 46 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: exceeded_sample_limit on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: A - step: 1800 - - expr: sum(increase(prometheus_sd_file_read_errors_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: sd_file_read_error on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: E - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Service discovery errors - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Service discovery - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 36 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_tsdb_reloads_total{instance="$instance"}[30m])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: "{{ instance }}" - refId: A - thresholds: [] - timeFrom: - timeShift: - title: Reloaded block from disk - tooltip: - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 5 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_tsdb_blocks_loaded{instance="$instance"}) by (instance) - format: time_series - intervalFactor: 2 - legendFormat: Loaded data blocks - metric: prometheus_local_storage_memory_chunkdescs - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Loaded data blocks - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 3 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: prometheus_tsdb_head_series{instance="$instance"} - format: time_series - intervalFactor: 2 - legendFormat: Time series count - metric: prometheus_local_storage_memory_series - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Time series total count - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 1 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(rate(prometheus_tsdb_head_samples_appended_total{instance="$instance"}[$aggregation_interval])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: samples/s {{instance}} - metric: prometheus_local_storage_ingested_samples_total - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Samples Appended per second - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: '' - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: TSDB stats - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - To persist: "#9AC48A" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 2 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/Max.*/" - fill: 0 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_tsdb_head_chunks{instance="$instance"}) by (instance) - format: time_series - hide: false - intervalFactor: 2 - legendFormat: Head chunk count - metric: prometheus_local_storage_memory_chunks - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Head chunks count - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 35 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: max(prometheus_tsdb_head_max_time{instance="$instance"}) by (instance) - - min(prometheus_tsdb_head_min_time{instance="$instance"}) by (instance) - format: time_series - intervalFactor: 2 - legendFormat: "{{ instance }}" - refId: A - thresholds: [] - timeFrom: - timeShift: - title: Length of head block - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: ms - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 4 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(rate(prometheus_tsdb_head_chunks_created_total{instance="$instance"}[$aggregation_interval])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: created on {{ instance }} - refId: B - - expr: sum(rate(prometheus_tsdb_head_chunks_removed_total{instance="$instance"}[$aggregation_interval])) - by (instance) * -1 - format: time_series - intervalFactor: 2 - legendFormat: deleted on {{ instance }} - refId: C - thresholds: [] - timeFrom: - timeShift: - title: Head Chunks Created/Deleted per second - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Head block stats - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 33 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(increase(prometheus_tsdb_compaction_duration_sum{instance="$instance"}[30m]) - / increase(prometheus_tsdb_compaction_duration_count{instance="$instance"}[30m])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: "{{ instance }}" - refId: B - thresholds: [] - timeFrom: - timeShift: - title: Compaction duration - tooltip: - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 34 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_tsdb_head_gc_duration_seconds{instance="$instance"}) by - (instance, quantile) - format: time_series - intervalFactor: 2 - legendFormat: "{{ quantile }} on {{ instance }}" - refId: A - thresholds: [] - timeFrom: - timeShift: - title: Go Garbage collection duration - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 37 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(prometheus_tsdb_wal_truncate_duration_seconds{instance="$instance"}) - by (instance, quantile) - format: time_series - intervalFactor: 2 - legendFormat: "{{ quantile }} on {{ instance }}" - refId: A - thresholds: [] - timeFrom: - timeShift: - title: WAL truncate duration seconds - tooltip: - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 38 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: sum(tsdb_wal_fsync_duration_seconds{instance="$instance"}) by (instance, - quantile) - format: time_series - intervalFactor: 2 - legendFormat: "{{ quantile }} {{ instance }}" - refId: A - thresholds: [] - timeFrom: - timeShift: - title: WAL fsync duration seconds - tooltip: - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Data maintenance - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: - Allocated bytes: "#7EB26D" - Allocated bytes - 1m max: "#BF1B00" - Allocated bytes - 1m min: "#BF1B00" - Allocated bytes - 5m max: "#BF1B00" - Allocated bytes - 5m min: "#BF1B00" - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - RSS: "#447EBC" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - decimals: - editable: true - error: false - fill: 1 - id: 6 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/-/" - fill: 0 - - alias: collector heap size - color: "#E0752D" - fill: 0 - linewidth: 2 - - alias: collector kubernetes memory limit - color: "#BF1B00" - fill: 0 - linewidth: 3 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(process_resident_memory_bytes{instance="$instance"}) by (instance) - format: time_series - hide: false - intervalFactor: 2 - legendFormat: Total resident memory - {{instance}} - metric: process_resident_memory_bytes - refId: B - step: 1800 - - expr: sum(go_memstats_alloc_bytes{instance="$instance"}) by (instance) - format: time_series - hide: false - intervalFactor: 2 - legendFormat: Total llocated bytes - {{instance}} - metric: go_memstats_alloc_bytes - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Memory - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Allocated bytes: "#F9BA8F" - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - RSS: "#890F02" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 7 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: rate(go_memstats_alloc_bytes_total{instance="$instance"}[$aggregation_interval]) - format: time_series - intervalFactor: 2 - legendFormat: Allocated Bytes/s - metric: go_memstats_alloc_bytes - refId: A - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Allocations per second - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - id: 9 - legend: - alignAsTable: false - avg: false - current: false - hideEmpty: false - max: false - min: false - rightSide: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(rate(process_cpu_seconds_total{instance="$instance"}[$aggregation_interval])) - by (instance) - format: time_series - intervalFactor: 2 - legendFormat: CPU/s - metric: prometheus_local_storage_ingested_samples_total - refId: B - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: CPU per second - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: - - avg - yaxes: - - format: none - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: RAM&CPU - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: - Chunks: "#1F78C1" - Chunks to persist: "#508642" - Max chunks: "#052B51" - Max to persist: "#3F6833" - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 47 - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum(increase(net_conntrack_dialer_conn_failed_total{instance="$instance"}[$aggregation_interval])) - by (instance) > 0 - format: time_series - hide: false - interval: '' - intervalFactor: 2 - legendFormat: conntrack_dialer_conn_failed on {{ instance }} - metric: prometheus_local_storage_chunk_ops_total - refId: M - step: 1800 - thresholds: [] - timeFrom: - timeShift: - title: Net errors - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Contrac errors - titleSize: h6 - schemaVersion: 14 - style: dark - tags: - - prometheus - templating: - list: - - auto: true - auto_count: 30 - auto_min: 2m - current: - text: auto - value: "$__auto_interval" - hide: 0 - label: aggregation intarval - name: aggregation_interval - options: - - selected: true - text: auto - value: "$__auto_interval" - - selected: false - text: 1m - value: 1m - - selected: false - text: 10m - value: 10m - - selected: false - text: 30m - value: 30m - - selected: false - text: 1h - value: 1h - - selected: false - text: 6h - value: 6h - - selected: false - text: 12h - value: 12h - - selected: false - text: 1d - value: 1d - - selected: false - text: 7d - value: 7d - - selected: false - text: 14d - value: 14d - - selected: false - text: 30d - value: 30d - query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d - refresh: 2 - type: interval - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: Instance - multi: false - name: instance - options: [] - query: label_values(prometheus_build_info, instance) - refresh: 2 - regex: '' - sort: 2 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - current: - text: influxdb(heapster) - kokura - value: influxdb(heapster) - kokura - hide: 0 - label: InfluxDB datasource - name: influx_datasource - options: [] - query: influxdb - refresh: 1 - regex: '' - type: datasource - time: - from: now-7d - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: Prometheus2.0 (v1.0.0 by FUSAKLA) - version: 8 - ceph_cluster: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: Prometheus.IO - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: panel - id: graph - name: Graph - version: '' - - type: grafana - id: grafana - name: Grafana - version: 3.1.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - id: - title: Ceph - Cluster - tags: - - ceph - - cluster - style: dark - timezone: browser - editable: true - hideControls: false - sharedCrosshair: false - rows: - - collapse: false - editable: true - height: 150px - panels: - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 21 - interval: 1m - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: ceph_health_status{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - refId: A - step: 60 - thresholds: '1,1' - title: Status - transparent: false - type: singlestat - valueFontSize: 100% - valueMaps: - - op: "=" - text: N/A - value: 'null' - - op: "=" - text: HEALTHY - value: '0' - - op: "=" - text: WARNING - value: '1' - - op: "=" - text: CRITICAL - value: '2' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 22 - interval: 1m - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - targets: - - expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '' - title: Pools - transparent: false - type: singlestat - valueFontSize: 100% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 33 - interval: 1m - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - targets: - - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: 0.025,0.1 - title: Cluster Capacity - transparent: false - type: singlestat - valueFontSize: 100% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 34 - interval: 1m - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - targets: - - expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: 0.025,0.1 - title: Used Capacity - transparent: false - type: singlestat - valueFontSize: 100% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percentunit - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 23 - interval: 1m - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '70,80' - title: Current Utilization - transparent: false - type: singlestat - valueFontSize: 100% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - title: New row - - collapse: false - editable: true - height: 100px - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 26 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '' - title: OSDs IN - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 40, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 27 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '1,1' - title: OSDs OUT - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 28 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '' - title: OSDs UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 40, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 29 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '1,1' - title: OSDs DOWN - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 30 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - targets: - - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '250,300' - title: Average PGs per OSD - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - title: New row - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: - Available: "#EAB839" - Total Capacity: "#447EBC" - Used: "#BF1B00" - total_avail: "#6ED0E0" - total_space: "#7EB26D" - total_used: "#890F02" - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 4 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '300' - id: 1 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 0 - links: [] - minSpan: - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: Total Capacity - fill: 0 - linewidth: 3 - stack: false - span: 4 - stack: true - steppedLine: false - targets: - - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Available - refId: A - step: 60 - - expr: ceph_cluster_total_used_bytes - interval: "$interval" - intervalFactor: 1 - legendFormat: Used - refId: B - step: 60 - - expr: ceph_cluster_total_bytes - interval: "$interval" - intervalFactor: 1 - legendFormat: Total Capacity - refId: C - step: 60 - timeFrom: - timeShift: - title: Capacity - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: - Total Capacity: "#7EB26D" - Used: "#BF1B00" - total_avail: "#6ED0E0" - total_space: "#7EB26D" - total_used: "#890F02" - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - thresholdLine: false - height: '300' - id: 3 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - minSpan: - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: true - steppedLine: false - targets: - - expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Write - refId: A - step: 60 - - expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Read - refId: B - step: 60 - timeFrom: - timeShift: - title: IOPS - tooltip: - msResolution: true - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: none - label: '' - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '300' - id: 7 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: true - steppedLine: false - targets: - - expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Write - refId: A - step: 60 - - expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Read - refId: B - step: 60 - timeFrom: - timeShift: - title: Throughput - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - repeat: - showTitle: true - title: CLUSTER - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 18 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/^Total.*$/" - stack: false - span: 12 - stack: true - steppedLine: false - targets: - - expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Total - refId: A - step: 60 - timeFrom: - timeShift: - title: Objects in the Cluster - tooltip: - msResolution: false - shared: true - sort: 1 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 19 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/^Total.*$/" - stack: false - span: 6 - stack: true - steppedLine: false - targets: - - expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Total - refId: A - step: 60 - - expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Active - refId: B - step: 60 - - expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Inconsistent - refId: C - step: 60 - - expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Creating - refId: D - step: 60 - - expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Recovering - refId: E - step: 60 - - expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Down - refId: F - step: 60 - timeFrom: - timeShift: - title: PGs - tooltip: - msResolution: false - shared: true - sort: 1 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 20 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/^Total.*$/" - stack: false - span: 6 - stack: true - steppedLine: false - targets: - - expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Degraded - refId: A - step: 60 - - expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Stale - refId: B - step: 60 - - expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Undersized - refId: C - step: 60 - timeFrom: - timeShift: - title: Stuck PGs - tooltip: - msResolution: false - shared: true - sort: 1 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - title: New row - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - current: {} - hide: 0 - label: Cluster - name: ceph_cluster - options: [] - type: query - query: label_values(ceph_health_status, release_group) - refresh: 1 - sort: 2 - datasource: "${DS_PROMETHEUS}" - - auto: true - auto_count: 10 - auto_min: 1m - current: - tags: [] - text: 1m - value: 1m - datasource: - hide: 0 - includeAll: false - label: Interval - multi: false - name: interval - options: - - selected: false - text: auto - value: "$__auto_interval" - - selected: true - text: 1m - value: 1m - - selected: false - text: 10m - value: 10m - - selected: false - text: 30m - value: 30m - - selected: false - text: 1h - value: 1h - - selected: false - text: 6h - value: 6h - - selected: false - text: 12h - value: 12h - - selected: false - text: 1d - value: 1d - - selected: false - text: 7d - value: 7d - - selected: false - text: 14d - value: 14d - - selected: false - text: 30d - value: 30d - query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d - refresh: 0 - type: interval - annotations: - list: [] - refresh: 5m - schemaVersion: 12 - version: 26 - links: [] - gnetId: 917 - description: "Ceph Cluster overview.\r\n" - ceph_osd: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: Prometheus.IO - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: panel - id: graph - name: Graph - version: '' - - type: grafana - id: grafana - name: Grafana - version: 3.1.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - id: - title: Ceph - OSD - tags: - - ceph - - osd - style: dark - timezone: browser - editable: true - hideControls: false - sharedCrosshair: false - rows: - - collapse: false - editable: true - height: 100px - panels: - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 40, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 6 - interval: - isNew: true - links: [] - mappingType: 2 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - - from: '0' - text: DOWN - to: '0.99' - - from: '0.99' - text: UP - to: '1' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: ceph_osd_up{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - refId: A - step: 60 - thresholds: '0,1' - timeFrom: - title: Status - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: DOWN - value: '0' - - op: "=" - text: UP - value: '1' - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 40, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 8 - interval: - isNew: true - links: [] - mappingType: 2 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - - from: '0' - text: OUT - to: '0.99' - - from: '0.99' - text: IN - to: '1' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: ceph_osd_in{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - refId: A - step: 60 - thresholds: '0,1' - timeFrom: - title: Available - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: DOWN - value: '0' - - op: "=" - text: UP - value: '1' - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 10 - interval: - isNew: true - links: [] - mappingType: 2 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - refId: A - step: 60 - thresholds: '0,1' - timeFrom: - title: Total OSDs - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: DOWN - value: '0' - - op: "=" - text: UP - value: '1' - - op: "=" - text: N/A - value: 'null' - valueName: current - title: New row - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: 250 - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: 300 - threshold2Color: rgba(234, 112, 112, 0.22) - thresholdLine: true - id: 5 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/^Average.*/" - fill: 0 - stack: false - span: 10 - stack: true - steppedLine: false - targets: - - expr: ceph_osd_numpg{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Number of PGs - {{ $osd }} - refId: A - step: 60 - - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Average Number of PGs in the Cluster - refId: B - step: 60 - timeFrom: - timeShift: - title: PGs - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 7 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: true - targets: - - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})*100 - interval: "$interval" - intervalFactor: 1 - legendFormat: '' - refId: A - step: 60 - thresholds: '60,80' - timeFrom: - title: Utilization - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - showTitle: true - title: 'OSD: $osd' - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 2 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: true - steppedLine: false - targets: - - expr: ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Used - {{ osd.$osd }} - metric: ceph_osd_used_bytes - refId: A - step: 60 - - expr: ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - hide: false - interval: "$interval" - intervalFactor: 1 - legendFormat: Available - {{ $osd }} - metric: ceph_osd_avail_bytes - refId: B - step: 60 - timeFrom: - timeShift: - title: OSD Storage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 5 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 9 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: false - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 2 - points: true - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - legendFormat: Available - {{ $osd }} - metric: ceph_osd_avail_bytes - refId: A - step: 60 - timeFrom: - timeShift: - title: Utilization Variance - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: none - label: - logBase: 1 - max: - min: - show: true - - format: none - label: - logBase: 1 - max: - min: - show: true - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - current: {} - hide: 0 - label: Cluster - name: ceph_cluster - options: [] - type: query - query: label_values(ceph_health_status, release_group) - refresh: 1 - sort: 2 - datasource: "${DS_PROMETHEUS}" - - auto: true - auto_count: 10 - auto_min: 1m - current: - selected: true - text: 1m - value: 1m - datasource: - hide: 0 - includeAll: false - label: Interval - multi: false - name: interval - options: - - selected: false - text: auto - value: "$__auto_interval" - - selected: true - text: 1m - value: 1m - - selected: false - text: 10m - value: 10m - - selected: false - text: 30m - value: 30m - - selected: false - text: 1h - value: 1h - - selected: false - text: 6h - value: 6h - - selected: false - text: 12h - value: 12h - - selected: false - text: 1d - value: 1d - - selected: false - text: 7d - value: 7d - - selected: false - text: 14d - value: 14d - - selected: false - text: 30d - value: 30d - query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d - refresh: 0 - type: interval - - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: OSD - multi: false - name: osd - options: [] - query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, ceph_daemon) - refresh: 1 - regex: '' - type: query - annotations: - list: [] - refresh: 15m - schemaVersion: 12 - version: 18 - links: [] - gnetId: 923 - description: CEPH OSD Status. - ceph_pool: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: Prometheus.IO - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: panel - id: graph - name: Graph - version: '' - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: grafana - id: grafana - name: Grafana - version: 3.1.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - id: - title: Ceph - Pools - tags: - - ceph - - pools - style: dark - timezone: browser - editable: true - hideControls: false - sharedCrosshair: false - rows: - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 4 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '' - id: 2 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - rightSide: true - show: true - total: false - values: true - lines: true - linewidth: 0 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: "/^Total.*$/" - fill: 0 - linewidth: 4 - stack: false - - alias: "/^Raw.*$/" - color: "#BF1B00" - fill: 0 - linewidth: 4 - span: 10 - stack: true - steppedLine: false - targets: - - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Total - {{ $pool }} - refId: A - step: 60 - - expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Used - {{ $pool }} - refId: B - step: 60 - - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Available - {{ $pool }} - refId: C - step: 60 - - expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Raw - {{ $pool }} - refId: D - step: 60 - timeFrom: - timeShift: - title: "[[pool_name]] Pool Storage" - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: percentunit - gauge: - maxValue: 1 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 10 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}) - interval: "$interval" - intervalFactor: 1 - refId: A - step: 60 - thresholds: '' - title: "[[pool_name]] Pool Usage" - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - showTitle: true - title: 'Pool: $pool' - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '' - id: 7 - isNew: true - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Objects - {{ $pool_name }} - refId: A - step: 60 - - expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - interval: "$interval" - intervalFactor: 1 - legendFormat: Dirty Objects - {{ $pool_name }} - refId: B - step: 60 - timeFrom: - timeShift: - title: Objects in Pool [[pool_name]] - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - thresholdLine: false - id: 4 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: true - steppedLine: false - targets: - - expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) - interval: "$interval" - intervalFactor: 1 - legendFormat: Read - {{ $pool_name }} - refId: B - step: 60 - - expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) - interval: "$interval" - intervalFactor: 1 - legendFormat: Write - {{ $pool_name }} - refId: A - step: 60 - timeFrom: - timeShift: - title: "[[pool_name]] Pool IOPS" - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: none - label: IOPS - logBase: 1 - max: - min: 0 - show: true - - format: short - label: IOPS - logBase: 1 - max: - min: 0 - show: false - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 5 - interval: "$interval" - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: true - steppedLine: false - targets: - - expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) - interval: "$interval" - intervalFactor: 1 - legendFormat: Read Bytes - {{ $pool_name }} - refId: A - step: 60 - - expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) - interval: "$interval" - intervalFactor: 1 - legendFormat: Written Bytes - {{ $pool_name }} - refId: B - step: 60 - timeFrom: - timeShift: - title: "[[pool_name]] Pool Throughput" - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: 0 - show: true - - format: Bps - label: - logBase: 1 - max: - min: 0 - show: true - title: New row - time: - from: now-3h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - current: {} - hide: 0 - label: Cluster - name: ceph_cluster - options: [] - type: query - query: label_values(ceph_health_status, release_group) - refresh: 1 - sort: 2 - datasource: "${DS_PROMETHEUS}" - - auto: true - auto_count: 10 - auto_min: 1m - current: - selected: true - text: 1m - value: 1m - datasource: - hide: 0 - includeAll: false - label: Interval - multi: false - name: interval - options: - - selected: false - text: auto - value: "$__auto_interval" - - selected: true - text: 1m - value: 1m - - selected: false - text: 10m - value: 10m - - selected: false - text: 30m - value: 30m - - selected: false - text: 1h - value: 1h - - selected: false - text: 6h - value: 6h - - selected: false - text: 12h - value: 12h - - selected: false - text: 1d - value: 1d - - selected: false - text: 7d - value: 7d - - selected: false - text: 14d - value: 14d - - selected: false - text: 30d - value: 30d - query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d - refresh: 0 - type: interval - - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: Pool - multi: false - name: pool - options: [] - query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id) - refresh: 1 - regex: '' - type: query - - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: Pool - multi: false - name: pool_name - options: [] - query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name) - refresh: 1 - regex: '' - type: query - annotations: - list: [] - refresh: 5m - schemaVersion: 12 - version: 22 - links: [] - gnetId: 926 - description: Ceph Pools dashboard. - elasticsearch: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.6.3 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: - - builtIn: 1 - datasource: "-- Grafana --" - enable: true - hide: true - iconColor: rgba(0, 211, 255, 1) - name: Annotations & Alerts - type: dashboard - editable: true - gnetId: 4358 - graphTooltip: 1 - hideControls: false - id: - links: [] - refresh: 5m - rows: - - collapse: false - height: - panels: - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(178, 49, 13, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 8 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 5 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: (sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="green"})*2)+sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="yellow"}) - format: time_series - intervalFactor: 3 - legendFormat: '' - metric: '' - refId: A - step: 40 - thresholds: '0,1,2' - title: Cluster health status - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: GREEN - value: '2' - - op: "=" - text: YELLOW - value: '1' - - op: "=" - text: RED - value: '0' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 10 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"}) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - metric: '' - refId: A - step: 40 - thresholds: '' - title: Nodes - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 9 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_number_of_data_nodes{cluster="$cluster"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - metric: '' - refId: A - step: 40 - thresholds: '' - title: Data nodes - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - hideTimeOverride: true - id: 16 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_number_of_pending_tasks{cluster="$cluster"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - metric: '' - refId: A - step: 40 - thresholds: '' - title: Pending tasks - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Cluster - titleSize: h6 - - collapse: false - height: '' - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 11 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - minSpan: 2 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - repeat: shard_type - span: 2.4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_active_primary_shards{cluster="$cluster"} - intervalFactor: 2 - legendFormat: '' - refId: A - step: 40 - thresholds: '' - title: active primary shards - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 39 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - minSpan: 2 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2.4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_active_shards{cluster="$cluster"} - intervalFactor: 2 - legendFormat: '' - refId: A - step: 40 - thresholds: '' - title: active shards - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 40 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - minSpan: 2 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2.4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_initializing_shards{cluster="$cluster"} - intervalFactor: 2 - legendFormat: '' - refId: A - step: 40 - thresholds: '' - title: initializing shards - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 41 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - minSpan: 2 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2.4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_relocating_shards{cluster="$cluster"} - intervalFactor: 2 - legendFormat: '' - refId: A - step: 40 - thresholds: '' - title: relocating shards - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: '50' - id: 42 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - minSpan: 2 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2.4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: true - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - expr: elasticsearch_cluster_health_unassigned_shards{cluster="$cluster"} - intervalFactor: 2 - legendFormat: '' - refId: A - step: 40 - thresholds: '' - title: unassigned shards - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Shards - titleSize: h6 - - collapse: false - height: - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 30 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_master_node="true",name=~"$node"} - format: time_series - instant: false - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - master" - metric: '' - refId: A - step: 10 - - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_data_node="true",name=~"$node"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - data" - metric: '' - refId: B - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: CPU usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percent - label: CPU usage - logBase: 1 - max: 100 - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 0 - grid: {} - height: '400' - id: 31 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: elasticsearch_jvm_memory_used_bytes{cluster="$cluster",name=~"$node",name=~"$node"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - used: {{area}}" - metric: '' - refId: A - step: 10 - - expr: elasticsearch_jvm_memory_committed_bytes{cluster="$cluster",name=~"$node",name=~"$node"} - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - committed: {{area}}" - refId: B - step: 10 - - expr: elasticsearch_jvm_memory_max_bytes{cluster="$cluster",name=~"$node",name=~"$node"} - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - max: {{area}}" - refId: C - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: JVM memory usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: Memory - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 32 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: 1-(elasticsearch_filesystem_data_available_bytes{cluster="$cluster"}/elasticsearch_filesystem_data_size_bytes{cluster="$cluster",name=~"$node"}) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - {{path}}" - metric: '' - refId: A - step: 10 - thresholds: - - colorMode: custom - fill: true - fillColor: rgba(216, 200, 27, 0.27) - op: gt - value: 0.8 - - colorMode: custom - fill: true - fillColor: rgba(234, 112, 112, 0.22) - op: gt - value: 0.9 - timeFrom: - timeShift: - title: Disk usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percentunit - label: Disk Usage % - logBase: 1 - max: 1 - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 47 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - sort: max - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: sent - transform: negative-Y - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: irate(elasticsearch_transport_tx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} -sent" - refId: D - step: 10 - - expr: irate(elasticsearch_transport_rx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} -received" - refId: C - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Network usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: Bps - label: Bytes/sec - logBase: 1 - max: - min: - show: true - - format: pps - label: '' - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: System - titleSize: h6 - - collapse: false - height: '' - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 1 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: elasticsearch_indices_docs{cluster="$cluster",name=~"$node"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Documents count - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Documents - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 24 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Documents indexed rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: index calls/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 25 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_docs_deleted{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Documents deleted rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Documents/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 26 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Documents merged rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Documents/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Documents - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 48 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - indexing" - metric: '' - refId: A - step: 4 - - expr: irate(elasticsearch_indices_search_query_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - query" - refId: B - step: 4 - - expr: irate(elasticsearch_indices_search_fetch_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - fetch" - refId: C - step: 4 - - expr: irate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - merges" - refId: D - step: 4 - - expr: irate(elasticsearch_indices_refresh_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - refresh" - refId: E - step: 4 - - expr: irate(elasticsearch_indices_flush_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - flush" - refId: F - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: Total Operations rate - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Operations/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 49 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ name }} - indexing" - metric: '' - refId: A - step: 4 - - expr: irate(elasticsearch_indices_search_query_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - query" - refId: B - step: 4 - - expr: irate(elasticsearch_indices_search_fetch_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - fetch" - refId: C - step: 4 - - expr: irate(elasticsearch_indices_merges_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - merges" - refId: D - step: 4 - - expr: irate(elasticsearch_indices_refresh_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - refresh" - refId: E - step: 4 - - expr: irate(elasticsearch_indices_flush_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{ name }} - flush" - refId: F - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: Total Operations time - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: ms - label: Time - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Total Operations stats - titleSize: h6 - - collapse: false - height: '' - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 33 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: 'rate(elasticsearch_indices_search_query_time_seconds{cluster="$cluster",name=~"$node"}[$interval]) ' - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: Query time - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: ms - label: Time - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 5 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: Indexing time - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: ms - label: Time - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 3 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_merges_total_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: Merging time - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: Time - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Times - titleSize: h6 - - collapse: false - height: - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 4 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: elasticsearch_indices_fielddata_memory_size_bytes{cluster="$cluster",name=~"$node"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Field data memory size - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: Memory - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 34 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_fielddata_evictions{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Field data evictions - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Evictions/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 35 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: elasticsearch_indices_query_cache_memory_size_bytes{cluster="$cluster",name=~"$node"} - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Query cache size - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: Size - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 36 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: true - steppedLine: false - targets: - - expr: rate(elasticsearch_indices_query_cache_evictions{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}}" - metric: '' - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Query cache evictions - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: Evictions/s - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Caches - titleSize: h6 - - collapse: false - height: 728 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 45 - legend: - alignAsTable: true - avg: true - current: false - max: true - min: true - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: ' irate(elasticsearch_thread_pool_rejected_count{cluster="$cluster",name=~"$node"}[$interval])' - format: time_series - intervalFactor: 2 - legendFormat: "{{name}} - {{ type }}" - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Thread Pool operations rejected - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 46 - legend: - alignAsTable: true - avg: true - current: false - max: true - min: true - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} - format: time_series - intervalFactor: 2 - legendFormat: "{{name}} - {{ type }}" - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Thread Pool operations queued - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - height: '' - id: 43 - legend: - alignAsTable: true - avg: true - current: false - max: true - min: true - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} - format: time_series - intervalFactor: 2 - legendFormat: "{{name}} - {{ type }}" - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Thread Pool threads active - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - id: 44 - legend: - alignAsTable: true - avg: true - current: false - max: true - min: true - show: true - sort: avg - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: irate(elasticsearch_thread_pool_completed_count{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - intervalFactor: 2 - legendFormat: "{{name}} - {{ type }}" - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Thread Pool operations completed - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Thread Pool - titleSize: h6 - - collapse: false - height: - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 7 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: true - steppedLine: false - targets: - - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}} - {{gc}}" - metric: '' - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: GC count - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: GCs - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - height: '400' - id: 27 - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: true - min: true - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{name}} - {{gc}}" - metric: '' - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: GC time - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - label: Time - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: JVM Garbage Collection - titleSize: h6 - schemaVersion: 14 - style: dark - tags: - - elasticsearch - - App - templating: - list: - - auto: true - auto_count: 30 - auto_min: 10s - current: - text: auto - value: "$__auto_interval" - hide: 0 - label: Interval - name: interval - options: - - selected: true - text: auto - value: "$__auto_interval" - - selected: false - text: 1m - value: 1m - - selected: false - text: 10m - value: 10m - - selected: false - text: 30m - value: 30m - - selected: false - text: 1h - value: 1h - - selected: false - text: 6h - value: 6h - - selected: false - text: 12h - value: 12h - - selected: false - text: 1d - value: 1d - - selected: false - text: 7d - value: 7d - - selected: false - text: 14d - value: 14d - - selected: false - text: 30d - value: 30d - query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d - refresh: 2 - type: interval - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: Instance - multi: false - name: cluster - options: [] - query: label_values(elasticsearch_cluster_health_status,cluster) - refresh: 1 - regex: '' - sort: 1 - tagValuesQuery: - tags: [] - tagsQuery: - type: query - useTags: false - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: true - label: node - multi: true - name: node - options: [] - query: label_values(elasticsearch_process_cpu_percent,name) - refresh: 1 - regex: '' - sort: 1 - tagValuesQuery: - tags: [] - tagsQuery: - type: query - useTags: false - time: - from: now-12h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: Elasticsearch - version: 1 - description: Elasticsearch detailed dashboard - hosts_containers: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: panel - id: graph - name: Graph - version: '' - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: grafana - id: grafana - name: Grafana - version: 3.1.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.3.0 - id: - title: Container Metrics (cAdvisor) - description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU - / Memory / Filesystem usage as well as individual pod, containers, systemd services - statistics. Uses cAdvisor metrics only. - tags: - - kubernetes - style: dark - timezone: browser - editable: true - hideControls: false - sharedCrosshair: false - rows: - - collapse: false - editable: true - height: 200px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - thresholdLine: false - height: 200px - id: 32 - isNew: true - legend: - alignAsTable: false - avg: true - current: true - max: false - min: false - rightSide: false - show: false - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m])) - interval: 10s - intervalFactor: 1 - legendFormat: Received - metric: network - refId: A - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))' - interval: 10s - intervalFactor: 1 - legendFormat: Sent - metric: network - refId: B - step: 10 - timeFrom: - timeShift: - title: Network I/O pressure - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - transparent: false - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: - show: true - - format: Bps - label: - logBase: 1 - max: - min: - show: false - title: Network I/O pressure - - collapse: false - editable: true - height: 250px - panels: - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - height: 180px - id: 4 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) - / sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100 - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: 65, 90 - title: Cluster memory usage - transparent: false - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - height: 180px - id: 6 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) - / sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100 - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: 65, 90 - title: Cluster CPU usage (5m avg) - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - height: 180px - id: 7 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 4 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) - / sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) - * 100 - interval: 10s - intervalFactor: 1 - legendFormat: '' - metric: '' - refId: A - step: 10 - thresholds: 65, 90 - title: Cluster filesystem usage - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 9 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 20% - prefix: '' - prefixFontSize: 20% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Used - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 10 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Total - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 11 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: " cores" - postfixFontSize: 30% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Used - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 12 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: " cores" - postfixFontSize: 30% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Total - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 13 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Used - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - format: bytes - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - height: 1px - id: 14 - interval: - isNew: true - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) - interval: 10s - intervalFactor: 1 - refId: A - step: 10 - thresholds: '' - title: Total - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - showTitle: false - title: Total usage - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 3 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '' - id: 17 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (pod) - interval: 10s - intervalFactor: 1 - legendFormat: "{{ pod }}" - metric: container_cpu - refId: A - step: 10 - timeFrom: - timeShift: - title: Pods CPU usage (5m avg) - tooltip: - msResolution: true - shared: true - sort: 2 - value_type: cumulative - transparent: false - type: graph - xaxis: - show: true - yaxes: - - format: none - label: cores - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - showTitle: false - title: Pods CPU usage - - collapse: true - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 3 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '' - id: 23 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (systemd_service_name) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "{{ systemd_service_name }}" - metric: container_cpu - refId: A - step: 10 - timeFrom: - timeShift: - title: System services CPU usage (5m avg) - tooltip: - msResolution: true - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: none - label: cores - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: System services CPU usage - - collapse: true - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 3 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - height: '' - id: 24 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - hideEmpty: false - hideZero: false - max: false - min: false - rightSide: true - show: true - sideWidth: - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container!="POD",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (container, pod) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: 'pod: {{ pod }} | {{ container }}' - metric: container_cpu - refId: A - step: 10 - - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, name, image) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' - metric: container_cpu - refId: B - step: 10 - - expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, rkt_container_name) - interval: 10s - intervalFactor: 1 - legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' - metric: container_cpu - refId: C - step: 10 - timeFrom: - timeShift: - title: Containers CPU usage (5m avg) - tooltip: - msResolution: true - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: none - label: cores - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: Containers CPU usage - - collapse: true - editable: true - height: 500px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 3 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 20 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: false - show: true - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (id) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "{{ id }}" - metric: container_cpu - refId: A - step: 10 - timeFrom: - timeShift: - title: All processes CPU usage (5m avg) - tooltip: - msResolution: true - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: none - label: cores - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - showTitle: false - title: All processes CPU usage - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 25 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) - by (pod) - interval: 10s - intervalFactor: 1 - legendFormat: "{{ pod }}" - metric: container_memory_usage:sort_desc - refId: A - step: 10 - timeFrom: - timeShift: - title: Pods memory usage - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: Pods memory usage - - collapse: true - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 26 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}) - by (systemd_service_name) - interval: 10s - intervalFactor: 1 - legendFormat: "{{ systemd_service_name }}" - metric: container_memory_usage:sort_desc - refId: A - step: 10 - timeFrom: - timeShift: - title: System services memory usage - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: System services memory usage - - collapse: true - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 27 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container!="POD",kubernetes_io_hostname=~"^$Node$"}) - by (container, pod) - interval: 10s - intervalFactor: 1 - legendFormat: 'pod: {{ pod }} | {{ container }}' - metric: container_memory_usage:sort_desc - refId: A - step: 10 - - expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) - by (kubernetes_io_hostname, name, image) - interval: 10s - intervalFactor: 1 - legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' - metric: container_memory_usage:sort_desc - refId: B - step: 10 - - expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}) - by (kubernetes_io_hostname, rkt_container_name) - interval: 10s - intervalFactor: 1 - legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' - metric: container_memory_usage:sort_desc - refId: C - step: 10 - timeFrom: - timeShift: - title: Containers memory usage - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: Containers memory usage - - collapse: true - editable: true - height: 500px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 0 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 28 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: false - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: true - targets: - - expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"}) - by (id) - interval: 10s - intervalFactor: 1 - legendFormat: "{{ id }}" - metric: container_memory_usage:sort_desc - refId: A - step: 10 - timeFrom: - timeShift: - title: All processes memory usage - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: All processes memory usage - - collapse: false - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 16 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (pod) - interval: 10s - intervalFactor: 1 - legendFormat: "-> {{ pod }}" - metric: network - refId: A - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (pod)' - interval: 10s - intervalFactor: 1 - legendFormat: "<- {{ pod }}" - metric: network - refId: B - step: 10 - timeFrom: - timeShift: - title: Pods network I/O (5m avg) - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: Pods network I/O - - collapse: true - editable: true - height: 250px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 30 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: true - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (container, pod) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "-> pod: {{ pod }} | {{ container }}" - metric: network - refId: B - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (container, pod)' - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "<- pod: {{ pod }} | {{ container }}" - metric: network - refId: D - step: 10 - - expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, name, image) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name - }})" - metric: network - refId: A - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, name, image)' - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name - }})" - metric: network - refId: C - step: 10 - - expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, rkt_container_name) - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name - }}" - metric: network - refId: E - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (kubernetes_io_hostname, rkt_container_name)' - hide: false - interval: 10s - intervalFactor: 1 - legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name - }}" - metric: network - refId: F - step: 10 - timeFrom: - timeShift: - title: Containers network I/O (5m avg) - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: Containers network I/O - - collapse: true - editable: true - height: 500px - panels: - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 2 - editable: true - error: false - fill: 1 - grid: - threshold1: - threshold1Color: rgba(216, 200, 27, 0.27) - threshold2: - threshold2Color: rgba(234, 112, 112, 0.22) - id: 29 - isNew: true - legend: - alignAsTable: true - avg: true - current: true - max: false - min: false - rightSide: false - show: true - sideWidth: 200 - sort: current - sortDesc: true - total: false - values: true - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (id) - interval: 10s - intervalFactor: 1 - legendFormat: "-> {{ id }}" - metric: network - refId: A - step: 10 - - expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) - by (id)' - interval: 10s - intervalFactor: 1 - legendFormat: "<- {{ id }}" - metric: network - refId: B - step: 10 - timeFrom: - timeShift: - title: All processes network I/O (5m avg) - tooltip: - msResolution: false - shared: true - sort: 2 - value_type: cumulative - type: graph - xaxis: - show: true - yaxes: - - format: Bps - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - title: All processes network I/O - time: - from: now-5m - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: ".*" - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: true - multi: false - name: Node - options: [] - query: label_values(kubernetes_io_hostname) - refresh: 1 - type: query - annotations: - list: [] - refresh: 5m - schemaVersion: 12 - version: 13 - links: [] - gnetId: 315 - rabbitmq: - __inputs: - - name: DS_PROMETHEUS - label: Prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.2.0 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - editable: true - gnetId: 2121 - graphTooltip: 0 - hideControls: false - id: - links: [] - refresh: 5m - rows: - - collapse: false - height: 266 - panels: - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 13 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - targets: - - expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - refId: A - step: 2 - thresholds: Up,Down - timeFrom: 30s - title: RabbitMQ Server - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - - op: "=" - text: Down - value: '0' - - op: "=" - text: Up - value: '1' - valueName: current - - alert: - conditions: - - evaluator: - params: - - 1 - type: lt - operator: - type: and - query: - params: - - A - - 10s - - now - reducer: - params: [] - type: last - type: query - - evaluator: - params: [] - type: no_value - operator: - type: and - query: - params: - - A - - 10s - - now - reducer: - params: [] - type: last - type: query - executionErrorState: alerting - frequency: 60s - handler: 1 - message: Some of the RabbitMQ node is down - name: Node Stats alert - noDataState: no_data - notifications: [] - aliasColors: {} - bars: true - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 12 - legend: - alignAsTable: true - avg: false - current: true - max: false - min: false - show: true - total: false - values: true - lines: false - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 9 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}" - metric: rabbitmq_running - refId: A - step: 2 - thresholds: - - colorMode: critical - fill: true - line: true - op: lt - value: 1 - timeFrom: 30s - timeShift: - title: Node up Stats - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 6 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{instance}}:exchanges" - metric: rabbitmq_exchangesTotal - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Exchanges - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 4 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{instance}}:channels" - metric: rabbitmq_channelsTotal - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Channels - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 3 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{instance}}:consumers" - metric: rabbitmq_consumersTotal - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Consumers - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 5 - legend: - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{instance}}:connections" - metric: rabbitmq_connectionsTotal - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Connections - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 7 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 4 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{instance}}:queues" - metric: rabbitmq_queuesTotal - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Queues - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 8 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) - intervalFactor: 2 - legendFormat: "{{vhost}}:ready" - metric: rabbitmq_queue_messages_ready - refId: A - step: 2 - - expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) - intervalFactor: 2 - legendFormat: "{{vhost}}:published" - metric: rabbitmq_queue_messages_published_total - refId: B - step: 2 - - expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) - intervalFactor: 2 - legendFormat: "{{vhost}}:delivered" - metric: rabbitmq_queue_messages_delivered_total - refId: C - step: 2 - - expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) - intervalFactor: 2 - legendFormat: "{{vhost}}:unack" - metric: ack - refId: D - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Messages/host - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - decimals: 0 - fill: 1 - id: 2 - legend: - alignAsTable: true - avg: false - current: true - max: false - min: false - rightSide: false - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{queue}}:{{durable}}" - metric: rabbitmq_queue_messages - refId: A - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Messages / Queue - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 9 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:used" - metric: rabbitmq_node_mem_used - refId: A - step: 2 - - expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:limit" - metric: node_mem - refId: B - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Memory - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: decbytes - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 10 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:used" - metric: '' - refId: A - step: 2 - - expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:total" - metric: node_mem - refId: B - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: FIle descriptors - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 11 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - span: 6 - stack: false - steppedLine: false - targets: - - expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:used" - metric: '' - refId: A - step: 2 - - expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} - intervalFactor: 2 - legendFormat: "{{node}}:total" - metric: '' - refId: B - step: 2 - thresholds: [] - timeFrom: - timeShift: - title: Sockets - tooltip: - shared: true - sort: 0 - value_type: individual - transparent: false - type: graph - xaxis: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - current: {} - hide: 0 - label: null - name: rabbit - options: [] - type: query - query: label_values(rabbitmq_up, release_group) - refresh: 1 - sort: 1 - datasource: "${DS_PROMETHEUS}" - time: - from: now-5m - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: RabbitMQ Metrics - version: 17 - description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections, - Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.' - kubernetes_capacity_planning: - __inputs: - - name: DS_PROMETHEUS - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.1 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - description: '' - editable: true - gnetId: 22 - graphTooltip: 0 - hideControls: false - id: - links: [] - refresh: false - rows: - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 3 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100 - hide: false - intervalFactor: 10 - legendFormat: '' - refId: A - step: 50 - thresholds: [] - timeFrom: - timeShift: - title: Idle cpu - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percent - label: cpu usage - logBase: 1 - max: - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 9 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(node_load1) - intervalFactor: 4 - legendFormat: load 1m - refId: A - step: 20 - target: '' - - expr: sum(node_load5) - intervalFactor: 4 - legendFormat: load 5m - refId: B - step: 20 - target: '' - - expr: sum(node_load15) - intervalFactor: 4 - legendFormat: load 15m - refId: C - step: 20 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: System load - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percentunit - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 4 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} - yaxis: 2 - spaceLength: 10 - span: 9 - stack: true - steppedLine: false - targets: - - expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - - sum(node_memory_Cached) - intervalFactor: 2 - legendFormat: memory usage - metric: memo - refId: A - step: 10 - target: '' - - expr: sum(node_memory_Buffers) - interval: '' - intervalFactor: 2 - legendFormat: memory buffers - metric: memo - refId: B - step: 10 - target: '' - - expr: sum(node_memory_Cached) - interval: '' - intervalFactor: 2 - legendFormat: memory cached - metric: memo - refId: C - step: 10 - target: '' - - expr: sum(node_memory_MemFree) - interval: '' - intervalFactor: 2 - legendFormat: memory free - metric: memo - refId: D - step: 10 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: Memory usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 5 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" - intervalFactor: 2 - metric: '' - refId: A - step: 60 - target: '' - thresholds: 80, 90 - title: Memory usage - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 246 - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 6 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: read - yaxis: 1 - - alias: '{instance="172.17.0.1:9100"}' - yaxis: 2 - - alias: io time - yaxis: 2 - spaceLength: 10 - span: 9 - stack: false - steppedLine: false - targets: - - expr: sum(rate(node_disk_bytes_read[5m])) - hide: false - intervalFactor: 4 - legendFormat: read - refId: A - step: 20 - target: '' - - expr: sum(rate(node_disk_bytes_written[5m])) - intervalFactor: 4 - legendFormat: written - refId: B - step: 20 - - expr: sum(rate(node_disk_io_time_ms[5m])) - intervalFactor: 4 - legendFormat: io time - refId: C - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Disk I/O - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: ms - label: - logBase: 1 - max: - min: - show: true - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percentunit - gauge: - maxValue: 1 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 12 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) - / sum(node_filesystem_size{device!="rootfs"}) - intervalFactor: 2 - refId: A - step: 60 - target: '' - thresholds: 0.75, 0.9 - title: Disk space usage - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 8 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: 'transmitted ' - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m])) - hide: false - intervalFactor: 2 - legendFormat: '' - refId: A - step: 10 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: Network received - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 10 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: 'transmitted ' - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m])) - hide: false - intervalFactor: 2 - legendFormat: '' - refId: B - step: 10 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: Network transmitted - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: bytes - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 276 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 11 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 9 - stack: false - steppedLine: false - targets: - - expr: sum(kube_pod_info) - format: time_series - intervalFactor: 2 - legendFormat: Current number of Pods - refId: A - step: 10 - - expr: sum(kube_node_status_capacity_pods) - format: time_series - intervalFactor: 2 - legendFormat: Maximum capacity of pods - refId: B - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Cluster Pod Utilization - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 7 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) - * 100 - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 60 - target: '' - thresholds: '80,90' - title: Pod Utilization - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: Kubernetes Capacity Planning - version: 4 - inputs: - - name: prometheus - pluginId: prometheus - type: datasource - value: prometheus - overwrite: true - kubernetes_cluster_status: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - editable: true - gnetId: - graphTooltip: 0 - hideControls: false - id: - links: [] - rows: - - collapse: false - height: 129 - panels: - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 5 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 6 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Control Plane UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: UP - value: 'null' - valueName: total - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 6 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 6 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '3,5' - title: Alerts Firing - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Cluster Health - titleSize: h6 - - collapse: false - height: 168 - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - decimals: - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 1 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '50,80' - title: API Servers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - decimals: - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 2 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"})) - * 100 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '50,80' - title: Controller Managers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: "${DS_PROMETHEUS}" - decimals: - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 3 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"})) - * 100 - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '50,80' - title: Schedulers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - hideTimeOverride: false - id: 4 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h]) - > 5) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Crashlooping Control Plane Pods - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Control Plane Status - titleSize: h6 - - collapse: false - height: 158 - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 8 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m])) - * 100)) / count(node_cpu{job="node-exporter",mode="idle"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '80,90' - title: CPU Utilization - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 7 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '80,90' - title: Memory Utilization - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 9 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) - / sum(node_filesystem_size{device!="rootfs"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '80,90' - title: Filesystem Utilization - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 10 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) - * 100 - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '80,90' - title: Pod Utilization - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Capacity Planing - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - time: - from: now-6h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: '' - title: Kubernetes Cluster Status - version: 3 - inputs: - - name: prometheus - pluginId: prometheus - type: datasource - value: prometheus - overwrite: true - nodes: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.1 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - description: Dashboard to get an overview of one server - editable: true - gnetId: 22 - graphTooltip: 0 - hideControls: false - id: - links: [] - refresh: false - rows: - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 3 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m])) - * 100) - hide: false - intervalFactor: 10 - legendFormat: "{{cpu}}" - refId: A - step: 50 - thresholds: [] - timeFrom: - timeShift: - title: Idle cpu - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percent - label: cpu usage - logBase: 1 - max: 100 - min: 0 - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 9 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: node_load1{instance="$server"} - intervalFactor: 4 - legendFormat: load 1m - refId: A - step: 20 - target: '' - - expr: node_load5{instance="$server"} - intervalFactor: 4 - legendFormat: load 5m - refId: B - step: 20 - target: '' - - expr: node_load15{instance="$server"} - intervalFactor: 4 - legendFormat: load 15m - refId: C - step: 20 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: System load - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: percentunit - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 4 - legend: - alignAsTable: false - avg: false - current: false - hideEmpty: false - hideZero: false - max: false - min: false - rightSide: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} - yaxis: 2 - spaceLength: 10 - span: 9 - stack: true - steppedLine: false - targets: - - expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"} - hide: false - interval: '' - intervalFactor: 2 - legendFormat: memory used - metric: '' - refId: C - step: 10 - - expr: node_memory_Buffers{instance="$server"} - interval: '' - intervalFactor: 2 - legendFormat: memory buffers - metric: '' - refId: E - step: 10 - - expr: node_memory_Cached{instance="$server"} - intervalFactor: 2 - legendFormat: memory cached - metric: '' - refId: F - step: 10 - - expr: node_memory_MemFree{instance="$server"} - intervalFactor: 2 - legendFormat: memory free - metric: '' - refId: D - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Memory usage - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 5 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}) - / node_memory_MemTotal{instance="$server"}) * 100 - intervalFactor: 2 - refId: A - step: 60 - target: '' - thresholds: 80, 90 - title: Memory usage - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 6 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: read - yaxis: 1 - - alias: '{instance="172.17.0.1:9100"}' - yaxis: 2 - - alias: io time - yaxis: 2 - spaceLength: 10 - span: 9 - stack: false - steppedLine: false - targets: - - expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m])) - hide: false - intervalFactor: 4 - legendFormat: read - refId: A - step: 20 - target: '' - - expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m])) - intervalFactor: 4 - legendFormat: written - refId: B - step: 20 - - expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m])) - intervalFactor: 4 - legendFormat: io time - refId: C - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Disk I/O - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: ms - label: - logBase: 1 - max: - min: - show: true - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: percentunit - gauge: - maxValue: 1 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 7 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"})) - / sum(node_filesystem_size{device!="rootfs",instance="$server"}) - intervalFactor: 2 - refId: A - step: 60 - target: '' - thresholds: 0.75, 0.9 - title: Disk space usage - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 8 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: 'transmitted ' - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m]) - hide: false - intervalFactor: 2 - legendFormat: "{{device}}" - refId: A - step: 10 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: Network received - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - alerting: {} - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 10 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: 'transmitted ' - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m]) - hide: false - intervalFactor: 2 - legendFormat: "{{device}}" - refId: B - step: 10 - target: '' - thresholds: [] - timeFrom: - timeShift: - title: Network transmitted - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: - show: true - - format: bytes - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: Server - multi: false - name: host - options: [] - query: label_values(node_uname_info, nodename) - refresh: 1 - regex: '' - sort: 0 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 2 - includeAll: false - label: Instance - multi: false - name: server - options: [] - query: label_values(node_uname_info{nodename="$host"}, instance) - refresh: 1 - regex: '' - sort: 0 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: Nodes - version: 2 - inputs: - - name: prometheus - pluginId: prometheus - type: datasource - value: prometheus - overwrite: true - openstack_control_plane: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.5.2 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - - type: panel - id: text - name: Text - version: '' - annotations: - list: [] - editable: true - gnetId: - graphTooltip: 1 - hideControls: false - id: - links: [] - refresh: 5m - rows: - - collapse: false - height: 250px - panels: - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 24 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=keystone - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_keystone_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Keystone - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 23 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=glance - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_glance_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Glance - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(202, 58, 40, 0.86) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 22 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=heat - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_heat_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Heat - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 21 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=neutron - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_neutron_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Neutron - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(208, 53, 34, 0.82) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 20 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=nova - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_nova_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Nova - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 19 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=swift - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_swift_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Ceph - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 18 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=cinder - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_cinder_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Cinder - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 17 - interval: "> 60s" - links: - - dashboard: Openstack Service - name: Drilldown dashboard - params: var-Service=placement - title: Openstack Service - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_placement_api{job="openstack-metrics", region="$region"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Placement - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(208, 53, 34, 0.82) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 16 - interval: "> 60s" - links: - - dashboard: RabbitMQ Metrics - name: Drilldown dashboard - title: RabbitMQ Metrics - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: min(rabbitmq_up) - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: RabbitMQ - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(208, 53, 34, 0.82) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 15 - interval: "> 60s" - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: min(mysql_global_status_wsrep_ready) - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: MariaDB - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(225, 177, 40, 0.59) - - rgba(208, 53, 34, 0.82) - - rgba(118, 245, 40, 0.73) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 14 - interval: "> 60s" - links: - - dashboard: Nginx Stats - name: Drilldown dashboard - title: Nginx Stats - type: dashboard - mappingType: 2 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: '1' - text: OK - to: '99999999999999' - - from: '0' - text: CRIT - to: '0' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: sum_over_time(nginx_connections_total{type="active", namespace="openstack"}[5m]) - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '0,1' - title: Nginx - type: singlestat - valueFontSize: 50% - valueName: current - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(208, 53, 34, 0.82) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 13 - interval: "> 60s" - links: - - dashboard: Memcached - name: Drilldown dashboard - title: Memcached - type: dashboard - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 1 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: min(memcached_up) - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '1,2' - title: Memcached - type: singlestat - valueFontSize: 50% - valueMaps: - - op: "=" - text: no data - value: 'null' - - op: "=" - text: CRIT - value: '0' - - op: "=" - text: OK - value: '1' - - op: "=" - text: UNKW - value: '2' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: OpenStack Services - titleSize: h6 - - collapse: false - height: 250px - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 11 - interval: "> 60s" - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 3 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - alias: free - column: value - expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + openstack_total_free_vcpus{job="openstack-metrics", - region="$region"} - format: time_series - function: min - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - - alias: used - column: value - expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} - format: time_series - function: max - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: B - resultFormat: time_series - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: VCPUs (total vs used) - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 12 - interval: "> 60s" - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 3 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - alias: free - column: value - expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + openstack_total_free_ram_MB{job="openstack-metrics", - region="$region"} - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - - alias: used - column: value - expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: B - resultFormat: time_series - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: RAM (total vs used) - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: mbytes - label: '' - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 13 - interval: "> 60s" - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 3 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - alias: free - column: value - expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + openstack_total_free_disk_GB{job="openstack-metrics", - region="$region"} - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - - alias: used - column: value - expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: B - resultFormat: time_series - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: Disk (used vs total) - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: gbytes - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes": false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 27 - interval: "> 60s" - legend: - alignAsTable: false - avg: true - current: true - hideEmpty: true - hideZero: false - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 4 - links: [] - nullPointMode: null - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - alias: free - column: value - expr: sum(openstack_running_instances) - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - interval: "15s" - intervalFactor: 1 - legendFormat: "{{ running_vms }}" - policy: default - rawQuery: false - refID: A - resultFormat: time_series - - alias: used - column: value - expr: sum(openstack_total_running_instances) - format: time_series - function: mean - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - interval: "15s" - intervalFactor: 1 - legendFormat: "{{ total_vms }}" - policy: default - rawQuery: false - refID: B - resultFormat: time_series - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: OpenStack Instances - tooltip: - msResolution: false - shared: true - sort : 0 - value_type: cumulative - transparent: true - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: none - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: false - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Virtual resources - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - enable: true - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: - multi: false - name: region - options: [] - query: label_values(openstack_exporter_cache_refresh_duration_seconds, region) - refresh: 1 - regex: '' - sort: 0 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - time: - from: now-1h - to: now - timepicker: - collapse: false - enable: true - notice: false - now: true - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - status: Stable - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - type: timepicker - timezone: browser - title: OpenStack Metrics - version: 2 - nginx_stats: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.5.2 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - annotations: - list: [] - description: Show stats from the hnlq715/nginx-vts-exporter. - editable: true - gnetId: 2949 - graphTooltip: 0 - hideControls: false - id: - links: [] - refresh: 5m - rows: - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 7 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum(nginx_upstream_responses_total{upstream=~"^$Upstream$"}) by (status_code, - upstream) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ status_code }}.{{ upstream }}" - metric: nginx_upstream_response - refId: A - step: 4 - thresholds: [] - timeFrom: - timeShift: - title: HTTP Response Codes by Upstream - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 6 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_upstream_requests_total{upstream=~"^$Upstream$"}[5m])) - by (upstream) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ upstream }}" - metric: nginx_upstream_requests - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Upstream Requests rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 5 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_upstream_bytes_total{upstream=~"^$Upstream$"}[5m])) by - (direction, upstream) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ direction }}.{{ upstream }}" - metric: nginx_upstream_bytes - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Upstream Bytes Transfer rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250px - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 1 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_connections_total[5m])) by (type) - format: time_series - intervalFactor: 2 - legendFormat: "{{ type }}" - metric: nginx_server_connections - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Overall Connections rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 4 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_cache_total{ server_zone=~"$ingress"}[5m])) by (server_zone, - type) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ type }}.{{ server_zone }}" - metric: nginx_server_cache - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Cache Action rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 3 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_requests_total{ server_zone=~"$ingress" }[5m])) by (server_zone) - format: time_series - interval: '' - intervalFactor: 2 - legendFormat: "{{ server_zone }}" - metric: nginx_server_requests - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Overall Requests rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - id: 2 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(irate(nginx_bytes_total{ server_zone=~"$ingress" }[5m])) by (direction, - server_zone) - format: time_series - intervalFactor: 2 - legendFormat: "{{ direction }}.{{ server_zone }}" - metric: nginx_server_bytes - refId: A - step: 10 - thresholds: [] - timeFrom: - timeShift: - title: Overall Bytes Transferred rate - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - label: - logBase: 1 - max: - min: '0' - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: - - prometheus - - nginx - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: ".*" - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: - multi: true - name: Upstream - options: [] - query: label_values(nginx_upstream_bytes_total, upstream) - refresh: 1 - regex: '' - sort: 1 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - - allValue: - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: false - label: - multi: true - name: ingress - options: [] - query: label_values(nginx_bytes_total, server_zone) - refresh: 1 - regex: "/^[^\\*_]+$/" - sort: 1 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: browser - title: Nginx Stats - version: 13 - openstack-service: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.5.2 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - enable: true - list: [] - editable: true - gnetId: - graphTooltip: 1 - hideControls: false - id: - links: [] - refresh: 5m - rows: - - collapse: false - height: 250px - panels: - - cacheTimeout: - colorBackground: true - colorValue: false - colors: - - rgba(225, 177, 40, 0.59) - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 6 - interval: "> 60s" - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - column: value - condition: '' - expr: openstack_check_[[Service]]_api{job="openstack-metrics"} - fill: '' - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - 'null' - type: fill - groupByTags: [] - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - thresholds: '0,1' - title: '' - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: CRITICAL - value: '0' - - op: "=" - text: OK - value: '1' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(200, 54, 35, 0.88) - - rgba(118, 245, 40, 0.73) - - rgba(225, 177, 40, 0.59) - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 13 - interval: "> 60s" - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 2 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: true - tableColumn: '' - targets: - - column: value - condition: '' - expr: sum(nginx_responses_total{server_zone=~"[[Service]].*", status_code="5xx"}) - fill: '' - format: time_series - function: count - groupBy: - - interval: auto - params: - - auto - type: time - - params: - - '0' - type: fill - groupby_field: '' - interval: '' - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - step: 120 - tags: [] - thresholds: '' - title: HTTP 5xx errors - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 0 - grid: {} - id: 7 - interval: ">60s" - legend: - alignAsTable: true - avg: true - current: false - max: true - min: true - show: true - sortDesc: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 8 - stack: false - steppedLine: false - targets: - - expr: sum(nginx_upstream_response_msecs_avg{upstream=~"openstack-[[Service]].*"}) - by (upstream) - format: time_series - intervalFactor: 2 - refId: A - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: HTTP response time - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 9 - interval: "> 60s" - legend: - avg: false - current: false - max: false - min: false - show: false - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 4 - stack: false - steppedLine: true - targets: - - alias: healthy - column: value - expr: openstack_check_[[Service]]_api - format: time_series - function: last - groupBy: - - params: - - "$interval" - type: time - - params: - - '0' - type: fill - groupByTags: [] - intervalFactor: 2 - policy: default - rawQuery: false - refId: A - resultFormat: time_series - select: [] - step: 120 - tags: [] - thresholds: [] - timeFrom: - timeShift: - title: API Availability - tooltip: - msResolution: false - shared: false - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: none - label: '' - logBase: 1 - max: 1 - min: 0 - show: false - - format: short - logBase: 1 - max: - min: - show: false - - aliasColors: - '{status_code="2xx"}': "#629E51" - '{status_code="5xx"}': "#BF1B00" - bars: true - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 0 - grid: {} - id: 8 - interval: "> 60s" - legend: - alignAsTable: false - avg: false - current: false - hideEmpty: false - max: false - min: false - rightSide: false - show: true - total: false - values: false - lines: false - linewidth: 1 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 8 - stack: true - steppedLine: false - targets: - - expr: sum(nginx_responses_total{server_zone=~"[[Service]].*"}) by (status_code) - format: time_series - intervalFactor: 2 - refId: A - step: 120 - thresholds: [] - timeFrom: - timeShift: - title: Number of HTTP responses - tooltip: - msResolution: false - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: true - title: Service Status - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - enable: true - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: - current: - tags: [] - text: cinder - value: cinder - hide: 0 - includeAll: false - label: - multi: false - name: Service - options: - - selected: false - text: nova - value: nova - - selected: false - text: glance - value: glance - - selected: false - text: keystone - value: keystone - - selected: true - text: cinder - value: cinder - - selected: false - text: heat - value: heat - - selected: false - text: placement - value: placement - - selected: false - text: neutron - value: neutron - query: nova,glance,keystone,cinder,heat,placement,neutron - type: custom - time: - from: now-1h - to: now - timepicker: - collapse: false - enable: true - notice: false - now: true - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - status: Stable - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - type: timepicker - timezone: browser - title: Openstack Service - version: 4 - coredns: - __inputs: - - name: prometheus - label: Prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.3 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - annotations: - list: [] - editable: true - gnetId: 5926 - graphTooltip: 0 - hideControls: false - id: - links: [] - rows: - - collapse: false - height: 250px - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 1 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: total - yaxis: 2 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) - by (proto) - format: time_series - intervalFactor: 2 - legendFormat: "{{proto}}" - refId: A - step: 60 - - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) - format: time_series - intervalFactor: 2 - legendFormat: total - refId: B - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: Requests (total) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: pps - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 12 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: total - yaxis: 2 - - alias: other - yaxis: 2 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_dns_request_type_count_total{instance=~"$instance"}[5m])) - by (type) - intervalFactor: 2 - legendFormat: "{{type}}" - refId: A - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: Requests (by qtype) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: pps - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 2 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: total - yaxis: 2 - spaceLength: 10 - span: 4 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) - by (zone) - intervalFactor: 2 - legendFormat: "{{zone}}" - refId: A - step: 60 - - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) - intervalFactor: 2 - legendFormat: total - refId: B - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: Requests (by zone) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: pps - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 10 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: total - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_dns_request_do_count_total{instance=~"$instance"}[5m])) - intervalFactor: 2 - legendFormat: DO - refId: A - step: 40 - - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) - intervalFactor: 2 - legendFormat: total - refId: B - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Requests (DO bit) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: pps - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 9 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: tcp:90 - yaxis: 2 - - alias: 'tcp:99 ' - yaxis: 2 - - alias: tcp:50 - yaxis: 2 - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:99 " - refId: A - step: 60 - - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:90" - refId: B - step: 60 - - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:50" - refId: C - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: Requests (size, udp) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 14 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: tcp:90 - yaxis: 1 - - alias: 'tcp:99 ' - yaxis: 1 - - alias: tcp:50 - yaxis: 1 - spaceLength: 10 - span: 3 - stack: false - steppedLine: false - targets: - - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:99 " - refId: A - step: 60 - - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:90" - refId: B - step: 60 - - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le,proto)) - intervalFactor: 2 - legendFormat: "{{proto}}:50" - refId: C - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: Requests (size,tcp) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Row - titleSize: h6 - - collapse: false - height: 250px - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 5 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_dns_response_rcode_count_total{instance=~"$instance"}[5m])) - by (rcode) - intervalFactor: 2 - legendFormat: "{{rcode}}" - refId: A - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Responses (by rcode) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 3 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) - by (le, job)) - intervalFactor: 2 - legendFormat: 99% - refId: A - step: 40 - - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) - by (le)) - intervalFactor: 2 - legendFormat: 90% - refId: B - step: 40 - - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) - by (le)) - intervalFactor: 2 - legendFormat: 50% - refId: C - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Responses (duration) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: s - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 8 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: udp:50% - yaxis: 1 - - alias: tcp:50% - yaxis: 2 - - alias: tcp:90% - yaxis: 2 - - alias: tcp:99% - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) - by (le,proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:99%" - refId: A - step: 40 - - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance="$instance",proto="udp"}[5m])) - by (le,proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:90%" - refId: B - step: 40 - - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) - by (le,proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:50%" - metric: '' - refId: C - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Responses (size, udp) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 13 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: udp:50% - yaxis: 1 - - alias: tcp:50% - yaxis: 1 - - alias: tcp:90% - yaxis: 1 - - alias: tcp:99% - yaxis: 1 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le,proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:99%" - refId: A - step: 40 - - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le,proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:90%" - refId: B - step: 40 - - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) - by (le, proto)) ' - intervalFactor: 2 - legendFormat: "{{proto}}:50%" - metric: '' - refId: C - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Responses (size, tcp) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: bytes - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - - collapse: false - height: 250px - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 15 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(coredns_cache_size{instance=~"$instance"}) by (type) - intervalFactor: 2 - legendFormat: "{{type}}" - refId: A - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Cache (size) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - logBase: 1 - max: - min: 0 - show: true - - format: short - logBase: 1 - max: - min: 0 - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - editable: true - error: false - fill: 1 - grid: {} - id: 16 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 2 - links: [] - nullPointMode: connected - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: - - alias: misses - yaxis: 2 - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum(rate(coredns_cache_hits_total{instance=~"$instance"}[5m])) by (type) - intervalFactor: 2 - legendFormat: hits:{{type}} - refId: A - step: 40 - - expr: sum(rate(coredns_cache_misses_total{instance=~"$instance"}[5m])) by (type) - intervalFactor: 2 - legendFormat: misses - refId: B - step: 40 - thresholds: [] - timeFrom: - timeShift: - title: Cache (hitrate) - tooltip: - shared: true - sort: 0 - value_type: cumulative - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: pps - logBase: 1 - max: - min: 0 - show: true - - format: pps - logBase: 1 - max: - min: 0 - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: New row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: - - dns - - coredns - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - - allValue: ".*" - current: {} - datasource: "${DS_PROMETHEUS}" - hide: 0 - includeAll: true - label: Instance - multi: false - name: instance - options: [] - query: up{job="coredns"} - refresh: 1 - regex: .*instance="(.*?)".* - sort: 0 - tagValuesQuery: '' - tags: [] - tagsQuery: '' - type: query - useTags: false - time: - from: now-3h - to: now - timepicker: - now: true - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: utc - title: CoreDNS - version: 3 - description: A dashboard for the CoreDNS DNS server. - Kubernetes_Calico: - __inputs: - - name: prometheus - label: Prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 5.0.0 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - annotations: - list: - - builtIn: 1 - datasource: "-- Grafana --" - enable: true - hide: true - iconColor: rgba(0, 211, 255, 1) - name: Annotations & Alerts - type: dashboard - description: Calico cluster monitoring dashboard - editable: false - gnetId: 3244 - graphTooltip: 0 - id: - links: [] - panels: - - collapsed: false - gridPos: - h: 1 - w: 24 - x: 0 - 'y': 0 - id: 15 - panels: [] - repeat: - title: Felix - type: row - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 1 - id: 1 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_active_local_endpoints - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active Local Endpoints - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 1 - id: 3 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_active_local_policies - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active Local Policies - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 8 - id: 2 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_active_local_selectors - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active Local Selectors - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 8 - id: 4 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_active_local_tags - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active Local Tags - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 15 - id: 5 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_cluster_num_host_endpoints - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Cluster Host Endpoints - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 15 - id: 6 - legend: - alignAsTable: true - avg: false - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_cluster_num_workload_endpoints - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Cluster Workload Endpoints - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 22 - id: 7 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_cluster_num_hosts - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Clusters Hosts - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 22 - id: 8 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_ipsets_calico - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active IP Sets - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 29 - id: 9 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_iptables_chains - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Active IP Tables Chains - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 29 - id: 10 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_ipset_errors - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: IP Set Command Failures - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 36 - id: 11 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_iptables_save_errors - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: IP Tables Save Errors - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 36 - id: 12 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_iptables_restore_errors - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: IP Tables Restore Errors - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 0 - 'y': 43 - id: 13 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_resyncs_started - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Felix Resyncing Datastore - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: "${DS_PROMETHEUS}" - fill: 1 - gridPos: - h: 7 - w: 12 - x: 12 - 'y': 43 - id: 14 - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - show: true - total: false - values: true - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - stack: false - steppedLine: false - targets: - - expr: felix_int_dataplane_failures - format: time_series - intervalFactor: 2 - legendFormat: "{{instance}}" - refId: A - step: 20 - thresholds: [] - timeFrom: - timeShift: - title: Dataplane failed updates - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - refresh: 5m - schemaVersion: 16 - style: dark - tags: - - calico - templating: - list: - - current: - text: Prometheus - value: Prometheus - hide: 0 - label: Prometheus datasource - name: DS_PROMETHEUS - options: [] - query: prometheus - refresh: 1 - regex: '' - type: datasource - time: - from: now-1h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: utc - title: Kubernetes Calico - version: 2 + dashboards: {} diff --git a/grafana/values_overrides/calico.yaml b/grafana/values_overrides/calico.yaml new file mode 100644 index 000000000..109b7826b --- /dev/null +++ b/grafana/values_overrides/calico.yaml @@ -0,0 +1,1050 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# the Calico CNI +conf: + dashboards: + calico: + __inputs: + - name: prometheus + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 5.0.0 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + annotations: + list: + - builtIn: 1 + datasource: "-- Grafana --" + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + description: Calico cluster monitoring dashboard + editable: false + gnetId: 3244 + graphTooltip: 0 + id: + links: [] + panels: + - collapsed: false + gridPos: + h: 1 + w: 24 + x: 0 + 'y': 0 + id: 15 + panels: [] + repeat: + title: Felix + type: row + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 1 + id: 1 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_active_local_endpoints + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active Local Endpoints + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 1 + id: 3 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_active_local_policies + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active Local Policies + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 8 + id: 2 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_active_local_selectors + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active Local Selectors + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 8 + id: 4 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_active_local_tags + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active Local Tags + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 15 + id: 5 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_cluster_num_host_endpoints + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Cluster Host Endpoints + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 15 + id: 6 + legend: + alignAsTable: true + avg: false + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_cluster_num_workload_endpoints + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Cluster Workload Endpoints + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 22 + id: 7 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_cluster_num_hosts + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Clusters Hosts + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 22 + id: 8 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_ipsets_calico + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active IP Sets + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 29 + id: 9 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_iptables_chains + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Active IP Tables Chains + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 29 + id: 10 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_ipset_errors + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: IP Set Command Failures + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 36 + id: 11 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_iptables_save_errors + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: IP Tables Save Errors + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 36 + id: 12 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_iptables_restore_errors + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: IP Tables Restore Errors + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 0 + 'y': 43 + id: 13 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_resyncs_started + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Felix Resyncing Datastore + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + gridPos: + h: 7 + w: 12 + x: 12 + 'y': 43 + id: 14 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - expr: felix_int_dataplane_failures + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}}" + refId: A + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Dataplane failed updates + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + refresh: 5m + schemaVersion: 16 + style: dark + tags: + - calico + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: utc + title: Kubernetes Calico + version: 2 diff --git a/grafana/values_overrides/ceph.yaml b/grafana/values_overrides/ceph.yaml new file mode 100644 index 000000000..b5a4546c4 --- /dev/null +++ b/grafana/values_overrides/ceph.yaml @@ -0,0 +1,2487 @@ +# NOTE(srwilkers): This overrides file provides a reference for dashboards for +# the overall state of ceph clusters, ceph osds in those clusters, and the +# status of ceph pools for those clusters +conf: + dashboards: + ceph_cluster: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: graph + name: Graph + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - Cluster + tags: + - ceph + - cluster + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 150px + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 21 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_health_status{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '1,1' + title: Status + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + - op: "=" + text: HEALTHY + value: '0' + - op: "=" + text: WARNING + value: '1' + - op: "=" + text: CRITICAL + value: '2' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 22 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: Pools + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 33 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.025,0.1 + title: Cluster Capacity + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 34 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.025,0.1 + title: Used Capacity + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percentunit + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 23 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '70,80' + title: Current Utilization + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 100px + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 26 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: OSDs IN + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 40, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 27 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '1,1' + title: OSDs OUT + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 28 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: OSDs UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 40, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 29 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '1,1' + title: OSDs DOWN + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 30 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '250,300' + title: Average PGs per OSD + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: + Available: "#EAB839" + Total Capacity: "#447EBC" + Used: "#BF1B00" + total_avail: "#6ED0E0" + total_space: "#7EB26D" + total_used: "#890F02" + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 4 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '300' + id: 1 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 0 + links: [] + minSpan: + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: Total Capacity + fill: 0 + linewidth: 3 + stack: false + span: 4 + stack: true + steppedLine: false + targets: + - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Available + refId: A + step: 60 + - expr: ceph_cluster_total_used_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Used + refId: B + step: 60 + - expr: ceph_cluster_total_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Total Capacity + refId: C + step: 60 + timeFrom: + timeShift: + title: Capacity + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Total Capacity: "#7EB26D" + Used: "#BF1B00" + total_avail: "#6ED0E0" + total_space: "#7EB26D" + total_used: "#890F02" + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + height: '300' + id: 3 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + minSpan: + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: true + steppedLine: false + targets: + - expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Write + refId: A + step: 60 + - expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read + refId: B + step: 60 + timeFrom: + timeShift: + title: IOPS + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: none + label: '' + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '300' + id: 7 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: true + steppedLine: false + targets: + - expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Write + refId: A + step: 60 + - expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read + refId: B + step: 60 + timeFrom: + timeShift: + title: Throughput + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + repeat: + showTitle: true + title: CLUSTER + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 18 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 12 + stack: true + steppedLine: false + targets: + - expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Total + refId: A + step: 60 + timeFrom: + timeShift: + title: Objects in the Cluster + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 19 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 6 + stack: true + steppedLine: false + targets: + - expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Total + refId: A + step: 60 + - expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Active + refId: B + step: 60 + - expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Inconsistent + refId: C + step: 60 + - expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Creating + refId: D + step: 60 + - expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Recovering + refId: E + step: 60 + - expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Down + refId: F + step: 60 + timeFrom: + timeShift: + title: PGs + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 20 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 6 + stack: true + steppedLine: false + targets: + - expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Degraded + refId: A + step: 60 + - expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Stale + refId: B + step: 60 + - expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Undersized + refId: C + step: 60 + timeFrom: + timeShift: + title: Stuck PGs + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + title: New row + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - current: {} + hide: 0 + label: Cluster + name: ceph_cluster + options: [] + type: query + query: label_values(ceph_health_status, release_group) + refresh: 1 + sort: 2 + datasource: "${DS_PROMETHEUS}" + - auto: true + auto_count: 10 + auto_min: 1m + current: + tags: [] + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + annotations: + list: [] + refresh: 5m + schemaVersion: 12 + version: 26 + links: [] + gnetId: 917 + description: "Ceph Cluster overview.\r\n" + ceph_osd: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: graph + name: Graph + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - OSD + tags: + - ceph + - osd + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 100px + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 40, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + - from: '0' + text: DOWN + to: '0.99' + - from: '0.99' + text: UP + to: '1' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osd_up{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Status + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 40, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 8 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + - from: '0' + text: OUT + to: '0.99' + - from: '0.99' + text: IN + to: '1' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osd_in{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Available + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Total OSDs + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: 250 + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: 300 + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: true + id: 5 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Average.*/" + fill: 0 + stack: false + span: 10 + stack: true + steppedLine: false + targets: + - expr: ceph_osd_numpg{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Number of PGs - {{ $osd }} + refId: A + step: 60 + - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Average Number of PGs in the Cluster + refId: B + step: 60 + timeFrom: + timeShift: + title: PGs + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})*100 + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '60,80' + timeFrom: + title: Utilization + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: true + title: 'OSD: $osd' + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 2 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: true + steppedLine: false + targets: + - expr: ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Used - {{ osd.$osd }} + metric: ceph_osd_used_bytes + refId: A + step: 60 + - expr: ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} + hide: false + interval: "$interval" + intervalFactor: 1 + legendFormat: Available - {{ $osd }} + metric: ceph_osd_avail_bytes + refId: B + step: 60 + timeFrom: + timeShift: + title: OSD Storage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 5 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 9 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: false + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 2 + points: true + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + legendFormat: Available - {{ $osd }} + metric: ceph_osd_avail_bytes + refId: A + step: 60 + timeFrom: + timeShift: + title: Utilization Variance + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: + logBase: 1 + max: + min: + show: true + - format: none + label: + logBase: 1 + max: + min: + show: true + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - current: {} + hide: 0 + label: Cluster + name: ceph_cluster + options: [] + type: query + query: label_values(ceph_health_status, release_group) + refresh: 1 + sort: 2 + datasource: "${DS_PROMETHEUS}" + - auto: true + auto_count: 10 + auto_min: 1m + current: + selected: true + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + - current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: OSD + multi: false + name: osd + options: [] + query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, ceph_daemon) + refresh: 1 + regex: '' + type: query + annotations: + list: [] + refresh: 15m + schemaVersion: 12 + version: 18 + links: [] + gnetId: 923 + description: CEPH OSD Status. + ceph_pool: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: graph + name: Graph + version: '' + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - Pools + tags: + - ceph + - pools + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 4 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 2 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 0 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + fill: 0 + linewidth: 4 + stack: false + - alias: "/^Raw.*$/" + color: "#BF1B00" + fill: 0 + linewidth: 4 + span: 10 + stack: true + steppedLine: false + targets: + - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Total - {{ $pool }} + refId: A + step: 60 + - expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Used - {{ $pool }} + refId: B + step: 60 + - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Available - {{ $pool }} + refId: C + step: 60 + - expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Raw - {{ $pool }} + refId: D + step: 60 + timeFrom: + timeShift: + title: "[[pool_name]] Pool Storage" + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}) + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '' + title: "[[pool_name]] Pool Usage" + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: true + title: 'Pool: $pool' + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 7 + isNew: true + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Objects - {{ $pool_name }} + refId: A + step: 60 + - expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Dirty Objects - {{ $pool_name }} + refId: B + step: 60 + timeFrom: + timeShift: + title: Objects in Pool [[pool_name]] + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + id: 4 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: true + steppedLine: false + targets: + - expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read - {{ $pool_name }} + refId: B + step: 60 + - expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Write - {{ $pool_name }} + refId: A + step: 60 + timeFrom: + timeShift: + title: "[[pool_name]] Pool IOPS" + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: none + label: IOPS + logBase: 1 + max: + min: 0 + show: true + - format: short + label: IOPS + logBase: 1 + max: + min: 0 + show: false + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 5 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: true + steppedLine: false + targets: + - expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read Bytes - {{ $pool_name }} + refId: A + step: 60 + - expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Written Bytes - {{ $pool_name }} + refId: B + step: 60 + timeFrom: + timeShift: + title: "[[pool_name]] Pool Throughput" + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + title: New row + time: + from: now-3h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - current: {} + hide: 0 + label: Cluster + name: ceph_cluster + options: [] + type: query + query: label_values(ceph_health_status, release_group) + refresh: 1 + sort: 2 + datasource: "${DS_PROMETHEUS}" + - auto: true + auto_count: 10 + auto_min: 1m + current: + selected: true + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + - current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: Pool + multi: false + name: pool + options: [] + query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id) + refresh: 1 + regex: '' + type: query + - current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: Pool + multi: false + name: pool_name + options: [] + query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name) + refresh: 1 + regex: '' + type: query + annotations: + list: [] + refresh: 5m + schemaVersion: 12 + version: 22 + links: [] + gnetId: 926 + description: Ceph Pools dashboard. diff --git a/grafana/values_overrides/containers.yaml b/grafana/values_overrides/containers.yaml new file mode 100644 index 000000000..c2b019f2c --- /dev/null +++ b/grafana/values_overrides/containers.yaml @@ -0,0 +1,1700 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# container metrics, specific to each host +conf: + dashboards: + containers: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: graph + name: Graph + version: '' + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.3.0 + id: + title: Container Metrics (cAdvisor) + description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU + / Memory / Filesystem usage as well as individual pod, containers, systemd services + statistics. Uses cAdvisor metrics only. + tags: + - kubernetes + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 200px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + height: 200px + id: 32 + isNew: true + legend: + alignAsTable: false + avg: true + current: true + max: false + min: false + rightSide: false + show: false + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m])) + interval: 10s + intervalFactor: 1 + legendFormat: Received + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))' + interval: 10s + intervalFactor: 1 + legendFormat: Sent + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: Network I/O pressure + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: Bps + label: + logBase: 1 + max: + min: + show: false + title: Network I/O pressure + - collapse: false + editable: true + height: 250px + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 4 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) + / sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100 + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster memory usage + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 6 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + / sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100 + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster CPU usage (5m avg) + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 7 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + / sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + * 100 + interval: 10s + intervalFactor: 1 + legendFormat: '' + metric: '' + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster filesystem usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 9 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 20% + prefix: '' + prefixFontSize: 20% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 10 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 11 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: " cores" + postfixFontSize: 30% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 12 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: " cores" + postfixFontSize: 30% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 13 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 14 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: false + title: Total usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 17 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ pod }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: Pods CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + transparent: false + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + showTitle: false + title: Pods CPU usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 23 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (systemd_service_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "{{ systemd_service_name }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: System services CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: System services CPU usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 24 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: false + min: false + rightSide: true + show: true + sideWidth: + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container!="POD",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container, pod) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: 'pod: {{ pod }} | {{ container }}' + metric: container_cpu + refId: A + step: 10 + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' + metric: container_cpu + refId: B + step: 10 + - expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name) + interval: 10s + intervalFactor: 1 + legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' + metric: container_cpu + refId: C + step: 10 + timeFrom: + timeShift: + title: Containers CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers CPU usage + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 20 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "{{ id }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: All processes CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + showTitle: false + title: All processes CPU usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 25 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) + by (pod) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ pod }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: Pods memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Pods memory usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 26 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}) + by (systemd_service_name) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ systemd_service_name }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: System services memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: System services memory usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 27 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container!="POD",kubernetes_io_hostname=~"^$Node$"}) + by (container, pod) + interval: 10s + intervalFactor: 1 + legendFormat: 'pod: {{ pod }} | {{ container }}' + metric: container_memory_usage:sort_desc + refId: A + step: 10 + - expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) + by (kubernetes_io_hostname, name, image) + interval: 10s + intervalFactor: 1 + legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' + metric: container_memory_usage:sort_desc + refId: B + step: 10 + - expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}) + by (kubernetes_io_hostname, rkt_container_name) + interval: 10s + intervalFactor: 1 + legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' + metric: container_memory_usage:sort_desc + refId: C + step: 10 + timeFrom: + timeShift: + title: Containers memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers memory usage + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 28 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"}) + by (id) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ id }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: All processes memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: All processes memory usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 16 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod) + interval: 10s + intervalFactor: 1 + legendFormat: "-> {{ pod }}" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod)' + interval: 10s + intervalFactor: 1 + legendFormat: "<- {{ pod }}" + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: Pods network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Pods network I/O + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 30 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container, pod) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> pod: {{ pod }} | {{ container }}" + metric: network + refId: B + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container, pod)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- pod: {{ pod }} | {{ container }}" + metric: network + refId: D + step: 10 + - expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name + }})" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name + }})" + metric: network + refId: C + step: 10 + - expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name + }}" + metric: network + refId: E + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name + }}" + metric: network + refId: F + step: 10 + timeFrom: + timeShift: + title: Containers network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers network I/O + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 29 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id) + interval: 10s + intervalFactor: 1 + legendFormat: "-> {{ id }}" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id)' + interval: 10s + intervalFactor: 1 + legendFormat: "<- {{ id }}" + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: All processes network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: All processes network I/O + time: + from: now-5m + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: ".*" + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: true + multi: false + name: Node + options: [] + query: label_values(kubernetes_io_hostname) + refresh: 1 + type: query + annotations: + list: [] + refresh: 5m + schemaVersion: 12 + version: 13 + links: [] + gnetId: 315 diff --git a/grafana/values_overrides/coredns.yaml b/grafana/values_overrides/coredns.yaml new file mode 100644 index 000000000..009b6f806 --- /dev/null +++ b/grafana/values_overrides/coredns.yaml @@ -0,0 +1,1016 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# CoreDNS +conf: + dashboards: + coredns: + __inputs: + - name: prometheus + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.3 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + annotations: + list: [] + editable: true + gnetId: 5926 + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 1 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: total + yaxis: 2 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) + by (proto) + format: time_series + intervalFactor: 2 + legendFormat: "{{proto}}" + refId: A + step: 60 + - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) + format: time_series + intervalFactor: 2 + legendFormat: total + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Requests (total) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: pps + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 12 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: total + yaxis: 2 + - alias: other + yaxis: 2 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_dns_request_type_count_total{instance=~"$instance"}[5m])) + by (type) + intervalFactor: 2 + legendFormat: "{{type}}" + refId: A + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Requests (by qtype) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: pps + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 2 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: total + yaxis: 2 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) + by (zone) + intervalFactor: 2 + legendFormat: "{{zone}}" + refId: A + step: 60 + - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) + intervalFactor: 2 + legendFormat: total + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Requests (by zone) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: pps + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 10 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: total + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_dns_request_do_count_total{instance=~"$instance"}[5m])) + intervalFactor: 2 + legendFormat: DO + refId: A + step: 40 + - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) + intervalFactor: 2 + legendFormat: total + refId: B + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Requests (DO bit) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: pps + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: tcp:90 + yaxis: 2 + - alias: 'tcp:99 ' + yaxis: 2 + - alias: tcp:50 + yaxis: 2 + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:99 " + refId: A + step: 60 + - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:90" + refId: B + step: 60 + - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:50" + refId: C + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Requests (size, udp) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 14 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: tcp:90 + yaxis: 1 + - alias: 'tcp:99 ' + yaxis: 1 + - alias: tcp:50 + yaxis: 1 + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:99 " + refId: A + step: 60 + - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:90" + refId: B + step: 60 + - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le,proto)) + intervalFactor: 2 + legendFormat: "{{proto}}:50" + refId: C + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Requests (size,tcp) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 5 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_dns_response_rcode_count_total{instance=~"$instance"}[5m])) + by (rcode) + intervalFactor: 2 + legendFormat: "{{rcode}}" + refId: A + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Responses (by rcode) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) + by (le, job)) + intervalFactor: 2 + legendFormat: 99% + refId: A + step: 40 + - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) + by (le)) + intervalFactor: 2 + legendFormat: 90% + refId: B + step: 40 + - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) + by (le)) + intervalFactor: 2 + legendFormat: 50% + refId: C + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Responses (duration) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: udp:50% + yaxis: 1 + - alias: tcp:50% + yaxis: 2 + - alias: tcp:90% + yaxis: 2 + - alias: tcp:99% + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) + by (le,proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:99%" + refId: A + step: 40 + - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance="$instance",proto="udp"}[5m])) + by (le,proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:90%" + refId: B + step: 40 + - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) + by (le,proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:50%" + metric: '' + refId: C + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Responses (size, udp) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 13 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: udp:50% + yaxis: 1 + - alias: tcp:50% + yaxis: 1 + - alias: tcp:90% + yaxis: 1 + - alias: tcp:99% + yaxis: 1 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le,proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:99%" + refId: A + step: 40 + - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le,proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:90%" + refId: B + step: 40 + - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) + by (le, proto)) ' + intervalFactor: 2 + legendFormat: "{{proto}}:50%" + metric: '' + refId: C + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Responses (size, tcp) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 15 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(coredns_cache_size{instance=~"$instance"}) by (type) + intervalFactor: 2 + legendFormat: "{{type}}" + refId: A + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Cache (size) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 16 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: misses + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(coredns_cache_hits_total{instance=~"$instance"}[5m])) by (type) + intervalFactor: 2 + legendFormat: hits:{{type}} + refId: A + step: 40 + - expr: sum(rate(coredns_cache_misses_total{instance=~"$instance"}[5m])) by (type) + intervalFactor: 2 + legendFormat: misses + refId: B + step: 40 + thresholds: [] + timeFrom: + timeShift: + title: Cache (hitrate) + tooltip: + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: pps + logBase: 1 + max: + min: 0 + show: true + - format: pps + logBase: 1 + max: + min: 0 + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: + - dns + - coredns + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: ".*" + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: true + label: Instance + multi: false + name: instance + options: [] + query: up{job="coredns"} + refresh: 1 + regex: .*instance="(.*?)".* + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-3h + to: now + timepicker: + now: true + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: utc + title: CoreDNS + version: 3 + description: A dashboard for the CoreDNS DNS server. diff --git a/grafana/values_overrides/elasticsearch.yaml b/grafana/values_overrides/elasticsearch.yaml new file mode 100644 index 000000000..8c1c31022 --- /dev/null +++ b/grafana/values_overrides/elasticsearch.yaml @@ -0,0 +1,2631 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# an Elasticsearch cluster +conf: + dashboards: + elasticsearch: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.6.3 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: + - builtIn: 1 + datasource: "-- Grafana --" + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + editable: true + gnetId: 4358 + graphTooltip: 1 + hideControls: false + id: + links: [] + refresh: 5m + rows: + - collapse: false + height: + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(178, 49, 13, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 8 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 5 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: (sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="green"})*2)+sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="yellow"}) + format: time_series + intervalFactor: 3 + legendFormat: '' + metric: '' + refId: A + step: 40 + thresholds: '0,1,2' + title: Cluster health status + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: GREEN + value: '2' + - op: "=" + text: YELLOW + value: '1' + - op: "=" + text: RED + value: '0' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 10 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"}) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + metric: '' + refId: A + step: 40 + thresholds: '' + title: Nodes + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 9 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_number_of_data_nodes{cluster="$cluster"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + metric: '' + refId: A + step: 40 + thresholds: '' + title: Data nodes + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + hideTimeOverride: true + id: 16 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_number_of_pending_tasks{cluster="$cluster"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + metric: '' + refId: A + step: 40 + thresholds: '' + title: Pending tasks + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Cluster + titleSize: h6 + - collapse: false + height: '' + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 11 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + minSpan: 2 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + repeat: shard_type + span: 2.4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_active_primary_shards{cluster="$cluster"} + intervalFactor: 2 + legendFormat: '' + refId: A + step: 40 + thresholds: '' + title: active primary shards + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 39 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + minSpan: 2 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2.4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_active_shards{cluster="$cluster"} + intervalFactor: 2 + legendFormat: '' + refId: A + step: 40 + thresholds: '' + title: active shards + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 40 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + minSpan: 2 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2.4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_initializing_shards{cluster="$cluster"} + intervalFactor: 2 + legendFormat: '' + refId: A + step: 40 + thresholds: '' + title: initializing shards + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 41 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + minSpan: 2 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2.4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_relocating_shards{cluster="$cluster"} + intervalFactor: 2 + legendFormat: '' + refId: A + step: 40 + thresholds: '' + title: relocating shards + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: '50' + id: 42 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + minSpan: 2 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2.4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: elasticsearch_cluster_health_unassigned_shards{cluster="$cluster"} + intervalFactor: 2 + legendFormat: '' + refId: A + step: 40 + thresholds: '' + title: unassigned shards + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Shards + titleSize: h6 + - collapse: false + height: + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 30 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_master_node="true",name=~"$node"} + format: time_series + instant: false + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - master" + metric: '' + refId: A + step: 10 + - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_data_node="true",name=~"$node"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - data" + metric: '' + refId: B + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: CPU usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percent + label: CPU usage + logBase: 1 + max: 100 + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 0 + grid: {} + height: '400' + id: 31 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: elasticsearch_jvm_memory_used_bytes{cluster="$cluster",name=~"$node",name=~"$node"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - used: {{area}}" + metric: '' + refId: A + step: 10 + - expr: elasticsearch_jvm_memory_committed_bytes{cluster="$cluster",name=~"$node",name=~"$node"} + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - committed: {{area}}" + refId: B + step: 10 + - expr: elasticsearch_jvm_memory_max_bytes{cluster="$cluster",name=~"$node",name=~"$node"} + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - max: {{area}}" + refId: C + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: JVM memory usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: Memory + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 32 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: 1-(elasticsearch_filesystem_data_available_bytes{cluster="$cluster"}/elasticsearch_filesystem_data_size_bytes{cluster="$cluster",name=~"$node"}) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - {{path}}" + metric: '' + refId: A + step: 10 + thresholds: + - colorMode: custom + fill: true + fillColor: rgba(216, 200, 27, 0.27) + op: gt + value: 0.8 + - colorMode: custom + fill: true + fillColor: rgba(234, 112, 112, 0.22) + op: gt + value: 0.9 + timeFrom: + timeShift: + title: Disk usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percentunit + label: Disk Usage % + logBase: 1 + max: 1 + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 47 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + sort: max + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: sent + transform: negative-Y + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: irate(elasticsearch_transport_tx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} -sent" + refId: D + step: 10 + - expr: irate(elasticsearch_transport_rx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} -received" + refId: C + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Network usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: Bps + label: Bytes/sec + logBase: 1 + max: + min: + show: true + - format: pps + label: '' + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: System + titleSize: h6 + - collapse: false + height: '' + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 1 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: elasticsearch_indices_docs{cluster="$cluster",name=~"$node"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Documents count + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Documents + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 24 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Documents indexed rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: index calls/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 25 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_docs_deleted{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Documents deleted rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Documents/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 26 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Documents merged rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Documents/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Documents + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 48 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - indexing" + metric: '' + refId: A + step: 4 + - expr: irate(elasticsearch_indices_search_query_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - query" + refId: B + step: 4 + - expr: irate(elasticsearch_indices_search_fetch_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - fetch" + refId: C + step: 4 + - expr: irate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - merges" + refId: D + step: 4 + - expr: irate(elasticsearch_indices_refresh_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - refresh" + refId: E + step: 4 + - expr: irate(elasticsearch_indices_flush_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - flush" + refId: F + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: Total Operations rate + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Operations/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 49 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ name }} - indexing" + metric: '' + refId: A + step: 4 + - expr: irate(elasticsearch_indices_search_query_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - query" + refId: B + step: 4 + - expr: irate(elasticsearch_indices_search_fetch_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - fetch" + refId: C + step: 4 + - expr: irate(elasticsearch_indices_merges_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - merges" + refId: D + step: 4 + - expr: irate(elasticsearch_indices_refresh_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - refresh" + refId: E + step: 4 + - expr: irate(elasticsearch_indices_flush_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{ name }} - flush" + refId: F + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: Total Operations time + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: ms + label: Time + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Total Operations stats + titleSize: h6 + - collapse: false + height: '' + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 33 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: 'rate(elasticsearch_indices_search_query_time_seconds{cluster="$cluster",name=~"$node"}[$interval]) ' + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: Query time + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: ms + label: Time + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 5 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: Indexing time + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: ms + label: Time + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 3 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_merges_total_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: Merging time + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: Time + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Times + titleSize: h6 + - collapse: false + height: + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 4 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: elasticsearch_indices_fielddata_memory_size_bytes{cluster="$cluster",name=~"$node"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Field data memory size + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: Memory + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 34 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_fielddata_evictions{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Field data evictions + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Evictions/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 35 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: elasticsearch_indices_query_cache_memory_size_bytes{cluster="$cluster",name=~"$node"} + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Query cache size + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: Size + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 36 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(elasticsearch_indices_query_cache_evictions{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}}" + metric: '' + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Query cache evictions + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: Evictions/s + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Caches + titleSize: h6 + - collapse: false + height: 728 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 45 + legend: + alignAsTable: true + avg: true + current: false + max: true + min: true + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: ' irate(elasticsearch_thread_pool_rejected_count{cluster="$cluster",name=~"$node"}[$interval])' + format: time_series + intervalFactor: 2 + legendFormat: "{{name}} - {{ type }}" + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Thread Pool operations rejected + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 46 + legend: + alignAsTable: true + avg: true + current: false + max: true + min: true + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} + format: time_series + intervalFactor: 2 + legendFormat: "{{name}} - {{ type }}" + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Thread Pool operations queued + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + height: '' + id: 43 + legend: + alignAsTable: true + avg: true + current: false + max: true + min: true + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} + format: time_series + intervalFactor: 2 + legendFormat: "{{name}} - {{ type }}" + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Thread Pool threads active + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 44 + legend: + alignAsTable: true + avg: true + current: false + max: true + min: true + show: true + sort: avg + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: irate(elasticsearch_thread_pool_completed_count{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + intervalFactor: 2 + legendFormat: "{{name}} - {{ type }}" + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Thread Pool operations completed + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Thread Pool + titleSize: h6 + - collapse: false + height: + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 7 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: true + steppedLine: false + targets: + - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}} - {{gc}}" + metric: '' + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: GC count + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: GCs + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + height: '400' + id: 27 + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: true + min: true + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{name}} - {{gc}}" + metric: '' + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: GC time + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: Time + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: JVM Garbage Collection + titleSize: h6 + schemaVersion: 14 + style: dark + tags: + - elasticsearch + - App + templating: + list: + - auto: true + auto_count: 30 + auto_min: 10s + current: + text: auto + value: "$__auto_interval" + hide: 0 + label: Interval + name: interval + options: + - selected: true + text: auto + value: "$__auto_interval" + - selected: false + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 2 + type: interval + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: Instance + multi: false + name: cluster + options: [] + query: label_values(elasticsearch_cluster_health_status,cluster) + refresh: 1 + regex: '' + sort: 1 + tagValuesQuery: + tags: [] + tagsQuery: + type: query + useTags: false + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: true + label: node + multi: true + name: node + options: [] + query: label_values(elasticsearch_process_cpu_percent,name) + refresh: 1 + regex: '' + sort: 1 + tagValuesQuery: + tags: [] + tagsQuery: + type: query + useTags: false + time: + from: now-12h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Elasticsearch + version: 1 + description: Elasticsearch detailed dashboard diff --git a/grafana/values_overrides/kubernetes.yaml b/grafana/values_overrides/kubernetes.yaml new file mode 100644 index 000000000..b9b35e34a --- /dev/null +++ b/grafana/values_overrides/kubernetes.yaml @@ -0,0 +1,1561 @@ +# NOTE(srwilkers): This overrides file provides a reference for dashboards that +# reflect the overall state of a Kubernetes deployment +conf: + dashboards: + kubernetes_capacity_planning: + __inputs: + - name: DS_PROMETHEUS + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + description: '' + editable: true + gnetId: 22 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: false + rows: + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100 + hide: false + intervalFactor: 10 + legendFormat: '' + refId: A + step: 50 + thresholds: [] + timeFrom: + timeShift: + title: Idle cpu + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percent + label: cpu usage + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(node_load1) + intervalFactor: 4 + legendFormat: load 1m + refId: A + step: 20 + target: '' + - expr: sum(node_load5) + intervalFactor: 4 + legendFormat: load 5m + refId: B + step: 20 + target: '' + - expr: sum(node_load15) + intervalFactor: 4 + legendFormat: load 15m + refId: C + step: 20 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: System load + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percentunit + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 4 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} + yaxis: 2 + spaceLength: 10 + span: 9 + stack: true + steppedLine: false + targets: + - expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached) + intervalFactor: 2 + legendFormat: memory usage + metric: memo + refId: A + step: 10 + target: '' + - expr: sum(node_memory_Buffers) + interval: '' + intervalFactor: 2 + legendFormat: memory buffers + metric: memo + refId: B + step: 10 + target: '' + - expr: sum(node_memory_Cached) + interval: '' + intervalFactor: 2 + legendFormat: memory cached + metric: memo + refId: C + step: 10 + target: '' + - expr: sum(node_memory_MemFree) + interval: '' + intervalFactor: 2 + legendFormat: memory free + metric: memo + refId: D + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Memory usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" + intervalFactor: 2 + metric: '' + refId: A + step: 60 + target: '' + thresholds: 80, 90 + title: Memory usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 246 + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: read + yaxis: 1 + - alias: '{instance="172.17.0.1:9100"}' + yaxis: 2 + - alias: io time + yaxis: 2 + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_disk_bytes_read[5m])) + hide: false + intervalFactor: 4 + legendFormat: read + refId: A + step: 20 + target: '' + - expr: sum(rate(node_disk_bytes_written[5m])) + intervalFactor: 4 + legendFormat: written + refId: B + step: 20 + - expr: sum(rate(node_disk_io_time_ms[5m])) + intervalFactor: 4 + legendFormat: io time + refId: C + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Disk I/O + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: ms + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 12 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) + / sum(node_filesystem_size{device!="rootfs"}) + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 0.75, 0.9 + title: Disk space usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m])) + hide: false + intervalFactor: 2 + legendFormat: '' + refId: A + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network received + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 10 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m])) + hide: false + intervalFactor: 2 + legendFormat: '' + refId: B + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network transmitted + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 276 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 11 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum(kube_pod_info) + format: time_series + intervalFactor: 2 + legendFormat: Current number of Pods + refId: A + step: 10 + - expr: sum(kube_node_status_capacity_pods) + format: time_series + intervalFactor: 2 + legendFormat: Maximum capacity of pods + refId: B + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Cluster Pod Utilization + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) + * 100 + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 60 + target: '' + thresholds: '80,90' + title: Pod Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Kubernetes Capacity Planning + version: 4 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + kubernetes_cluster_status: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 129 + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 6 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Control Plane UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: UP + value: 'null' + valueName: total + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 6 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '3,5' + title: Alerts Firing + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Cluster Health + titleSize: h6 + - collapse: false + height: 168 + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 1 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: API Servers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 2 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"})) + * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: Controller Managers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: "${DS_PROMETHEUS}" + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 3 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"})) + * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: Schedulers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + hideTimeOverride: false + id: 4 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h]) + > 5) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Crashlooping Control Plane Pods + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Control Plane Status + titleSize: h6 + - collapse: false + height: 158 + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 8 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m])) + * 100)) / count(node_cpu{job="node-exporter",mode="idle"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: CPU Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Memory Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 9 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) + / sum(node_filesystem_size{device!="rootfs"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Filesystem Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) + * 100 + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Pod Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Capacity Planing + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + time: + from: now-6h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: '' + title: Kubernetes Cluster Status + version: 3 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true diff --git a/grafana/values_overrides/nginx.yaml b/grafana/values_overrides/nginx.yaml new file mode 100644 index 000000000..7c36c9584 --- /dev/null +++ b/grafana/values_overrides/nginx.yaml @@ -0,0 +1,619 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# nginx +conf: + dashboards: + nginx_stats: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.5.2 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + annotations: + list: [] + description: Show stats from the hnlq715/nginx-vts-exporter. + editable: true + gnetId: 2949 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: 5m + rows: + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 7 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum(nginx_upstream_responses_total{upstream=~"^$Upstream$"}) by (status_code, + upstream) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ status_code }}.{{ upstream }}" + metric: nginx_upstream_response + refId: A + step: 4 + thresholds: [] + timeFrom: + timeShift: + title: HTTP Response Codes by Upstream + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_upstream_requests_total{upstream=~"^$Upstream$"}[5m])) + by (upstream) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ upstream }}" + metric: nginx_upstream_requests + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Upstream Requests rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 5 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_upstream_bytes_total{upstream=~"^$Upstream$"}[5m])) by + (direction, upstream) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ direction }}.{{ upstream }}" + metric: nginx_upstream_bytes + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Upstream Bytes Transfer rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 1 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_connections_total[5m])) by (type) + format: time_series + intervalFactor: 2 + legendFormat: "{{ type }}" + metric: nginx_server_connections + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Overall Connections rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 4 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_cache_total{ server_zone=~"$ingress"}[5m])) by (server_zone, + type) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ type }}.{{ server_zone }}" + metric: nginx_server_cache + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Cache Action rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_requests_total{ server_zone=~"$ingress" }[5m])) by (server_zone) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: "{{ server_zone }}" + metric: nginx_server_requests + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Overall Requests rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 2 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(irate(nginx_bytes_total{ server_zone=~"$ingress" }[5m])) by (direction, + server_zone) + format: time_series + intervalFactor: 2 + legendFormat: "{{ direction }}.{{ server_zone }}" + metric: nginx_server_bytes + refId: A + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Overall Bytes Transferred rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: + - prometheus + - nginx + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: ".*" + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: + multi: true + name: Upstream + options: [] + query: label_values(nginx_upstream_bytes_total, upstream) + refresh: 1 + regex: '' + sort: 1 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: + multi: true + name: ingress + options: [] + query: label_values(nginx_bytes_total, server_zone) + refresh: 1 + regex: "/^[^\\*_]+$/" + sort: 1 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Nginx Stats + version: 13 diff --git a/grafana/values_overrides/nodes.yaml b/grafana/values_overrides/nodes.yaml new file mode 100644 index 000000000..0c28bd890 --- /dev/null +++ b/grafana/values_overrides/nodes.yaml @@ -0,0 +1,755 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# the status of all nodes in a deployment +conf: + dashboards: + nodes: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + description: Dashboard to get an overview of one server + editable: true + gnetId: 22 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: false + rows: + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m])) + * 100) + hide: false + intervalFactor: 10 + legendFormat: "{{cpu}}" + refId: A + step: 50 + thresholds: [] + timeFrom: + timeShift: + title: Idle cpu + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percent + label: cpu usage + logBase: 1 + max: 100 + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: node_load1{instance="$server"} + intervalFactor: 4 + legendFormat: load 1m + refId: A + step: 20 + target: '' + - expr: node_load5{instance="$server"} + intervalFactor: 4 + legendFormat: load 5m + refId: B + step: 20 + target: '' + - expr: node_load15{instance="$server"} + intervalFactor: 4 + legendFormat: load 15m + refId: C + step: 20 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: System load + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percentunit + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 4 + legend: + alignAsTable: false + avg: false + current: false + hideEmpty: false + hideZero: false + max: false + min: false + rightSide: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} + yaxis: 2 + spaceLength: 10 + span: 9 + stack: true + steppedLine: false + targets: + - expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} + - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"} + hide: false + interval: '' + intervalFactor: 2 + legendFormat: memory used + metric: '' + refId: C + step: 10 + - expr: node_memory_Buffers{instance="$server"} + interval: '' + intervalFactor: 2 + legendFormat: memory buffers + metric: '' + refId: E + step: 10 + - expr: node_memory_Cached{instance="$server"} + intervalFactor: 2 + legendFormat: memory cached + metric: '' + refId: F + step: 10 + - expr: node_memory_MemFree{instance="$server"} + intervalFactor: 2 + legendFormat: memory free + metric: '' + refId: D + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Memory usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - + node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}) + / node_memory_MemTotal{instance="$server"}) * 100 + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 80, 90 + title: Memory usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: read + yaxis: 1 + - alias: '{instance="172.17.0.1:9100"}' + yaxis: 2 + - alias: io time + yaxis: 2 + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m])) + hide: false + intervalFactor: 4 + legendFormat: read + refId: A + step: 20 + target: '' + - expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m])) + intervalFactor: 4 + legendFormat: written + refId: B + step: 20 + - expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m])) + intervalFactor: 4 + legendFormat: io time + refId: C + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Disk I/O + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: ms + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"})) + / sum(node_filesystem_size{device!="rootfs",instance="$server"}) + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 0.75, 0.9 + title: Disk space usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m]) + hide: false + intervalFactor: 2 + legendFormat: "{{device}}" + refId: A + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network received + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 10 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m]) + hide: false + intervalFactor: 2 + legendFormat: "{{device}}" + refId: B + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network transmitted + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: Server + multi: false + name: host + options: [] + query: label_values(node_uname_info, nodename) + refresh: 1 + regex: '' + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 2 + includeAll: false + label: Instance + multi: false + name: server + options: [] + query: label_values(node_uname_info{nodename="$host"}, instance) + refresh: 1 + regex: '' + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Nodes + version: 2 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true diff --git a/grafana/values_overrides/openstack.yaml b/grafana/values_overrides/openstack.yaml new file mode 100644 index 000000000..d143a7967 --- /dev/null +++ b/grafana/values_overrides/openstack.yaml @@ -0,0 +1,3013 @@ +# NOTE(srwilkers): This overrides file provides a reference for dashboards for +# the openstack control plane as a whole, the individual openstack services, and +# rabbitmq +conf: + dashboards: + rabbitmq: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.2.0 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: 2121 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: 5m + rows: + - collapse: false + height: 266 + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: "${DS_PROMETHEUS}" + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 13 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + refId: A + step: 2 + thresholds: Up,Down + timeFrom: 30s + title: RabbitMQ Server + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + - op: "=" + text: Down + value: '0' + - op: "=" + text: Up + value: '1' + valueName: current + - alert: + conditions: + - evaluator: + params: + - 1 + type: lt + operator: + type: and + query: + params: + - A + - 10s + - now + reducer: + params: [] + type: last + type: query + - evaluator: + params: [] + type: no_value + operator: + type: and + query: + params: + - A + - 10s + - now + reducer: + params: [] + type: last + type: query + executionErrorState: alerting + frequency: 60s + handler: 1 + message: Some of the RabbitMQ node is down + name: Node Stats alert + noDataState: no_data + notifications: [] + aliasColors: {} + bars: true + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 12 + legend: + alignAsTable: true + avg: false + current: true + max: false + min: false + show: true + total: false + values: true + lines: false + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 9 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}" + metric: rabbitmq_running + refId: A + step: 2 + thresholds: + - colorMode: critical + fill: true + line: true + op: lt + value: 1 + timeFrom: 30s + timeShift: + title: Node up Stats + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 6 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{instance}}:exchanges" + metric: rabbitmq_exchangesTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Exchanges + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 4 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{instance}}:channels" + metric: rabbitmq_channelsTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Channels + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 3 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{instance}}:consumers" + metric: rabbitmq_consumersTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Consumers + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 5 + legend: + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{instance}}:connections" + metric: rabbitmq_connectionsTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Connections + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 7 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{instance}}:queues" + metric: rabbitmq_queuesTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Queues + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 8 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) + intervalFactor: 2 + legendFormat: "{{vhost}}:ready" + metric: rabbitmq_queue_messages_ready + refId: A + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) + intervalFactor: 2 + legendFormat: "{{vhost}}:published" + metric: rabbitmq_queue_messages_published_total + refId: B + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) + intervalFactor: 2 + legendFormat: "{{vhost}}:delivered" + metric: rabbitmq_queue_messages_delivered_total + refId: C + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) + intervalFactor: 2 + legendFormat: "{{vhost}}:unack" + metric: ack + refId: D + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Messages/host + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + decimals: 0 + fill: 1 + id: 2 + legend: + alignAsTable: true + avg: false + current: true + max: false + min: false + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{queue}}:{{durable}}" + metric: rabbitmq_queue_messages + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Messages / Queue + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 9 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: rabbitmq_node_mem_used + refId: A + step: 2 + - expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:limit" + metric: node_mem + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Memory + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: decbytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 10 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: '' + refId: A + step: 2 + - expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:total" + metric: node_mem + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: FIle descriptors + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 11 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: '' + refId: A + step: 2 + - expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} + intervalFactor: 2 + legendFormat: "{{node}}:total" + metric: '' + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Sockets + tooltip: + shared: true + sort: 0 + value_type: individual + transparent: false + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - current: {} + hide: 0 + label: null + name: rabbit + options: [] + type: query + query: label_values(rabbitmq_up, release_group) + refresh: 1 + sort: 1 + datasource: "${DS_PROMETHEUS}" + time: + from: now-5m + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: RabbitMQ Metrics + version: 17 + description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections, + Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.' + openstack_control_plane: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.5.2 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: text + name: Text + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 1 + hideControls: false + id: + links: [] + refresh: 5m + rows: + - collapse: false + height: 250px + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 24 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=keystone + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_keystone_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Keystone + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 23 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=glance + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_glance_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Glance + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(202, 58, 40, 0.86) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 22 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=heat + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_heat_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Heat + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 21 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=neutron + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_neutron_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Neutron + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 20 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=nova + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_nova_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Nova + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 19 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=swift + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_swift_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Ceph + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 18 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=cinder + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_cinder_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Cinder + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 17 + interval: "> 60s" + links: + - dashboard: Openstack Service + name: Drilldown dashboard + params: var-Service=placement + title: Openstack Service + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_placement_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Placement + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 16 + interval: "> 60s" + links: + - dashboard: RabbitMQ Metrics + name: Drilldown dashboard + title: RabbitMQ Metrics + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: min(rabbitmq_up) + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: RabbitMQ + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 15 + interval: "> 60s" + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: min(mysql_global_status_wsrep_ready) + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: MariaDB + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(225, 177, 40, 0.59) + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 14 + interval: "> 60s" + links: + - dashboard: Nginx Stats + name: Drilldown dashboard + title: Nginx Stats + type: dashboard + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: '1' + text: OK + to: '99999999999999' + - from: '0' + text: CRIT + to: '0' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: sum_over_time(nginx_connections_total{type="active", namespace="openstack"}[5m]) + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '0,1' + title: Nginx + type: singlestat + valueFontSize: 50% + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 13 + interval: "> 60s" + links: + - dashboard: Memcached + name: Drilldown dashboard + title: Memcached + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: min(memcached_up) + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Memcached + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: OpenStack Services + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 11 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 3 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + openstack_total_free_vcpus{job="openstack-metrics", + region="$region"} + format: time_series + function: min + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + format: time_series + function: max + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: VCPUs (total vs used) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 12 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 3 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + openstack_total_free_ram_MB{job="openstack-metrics", + region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: RAM (total vs used) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: mbytes + label: '' + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 13 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 3 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + openstack_total_free_disk_GB{job="openstack-metrics", + region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Disk (used vs total) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: gbytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes": false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 27 + interval: "> 60s" + legend: + alignAsTable: false + avg: true + current: true + hideEmpty: true + hideZero: false + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 4 + links: [] + nullPointMode: null + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + stack: false + steppedLine: false + targets: + - alias: free + column: value + expr: sum(openstack_running_instances) + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + interval: "15s" + intervalFactor: 1 + legendFormat: "{{ running_vms }}" + policy: default + rawQuery: false + refID: A + resultFormat: time_series + - alias: used + column: value + expr: sum(openstack_total_running_instances) + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + interval: "15s" + intervalFactor: 1 + legendFormat: "{{ total_vms }}" + policy: default + rawQuery: false + refID: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: OpenStack Instances + tooltip: + msResolution: false + shared: true + sort : 0 + value_type: cumulative + transparent: true + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: none + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Virtual resources + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + enable: true + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: + multi: false + name: region + options: [] + query: label_values(openstack_exporter_cache_refresh_duration_seconds, region) + refresh: 1 + regex: '' + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-1h + to: now + timepicker: + collapse: false + enable: true + notice: false + now: true + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + status: Stable + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + type: timepicker + timezone: browser + title: OpenStack Metrics + version: 2 + openstack-service: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.5.2 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + enable: true + list: [] + editable: true + gnetId: + graphTooltip: 1 + hideControls: false + id: + links: [] + refresh: 5m + rows: + - collapse: false + height: 250px + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(225, 177, 40, 0.59) + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: "> 60s" + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + expr: openstack_check_[[Service]]_api{job="openstack-metrics"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '0,1' + title: '' + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: CRITICAL + value: '0' + - op: "=" + text: OK + value: '1' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 13 + interval: "> 60s" + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - column: value + condition: '' + expr: sum(nginx_responses_total{server_zone=~"[[Service]].*", status_code="5xx"}) + fill: '' + format: time_series + function: count + groupBy: + - interval: auto + params: + - auto + type: time + - params: + - '0' + type: fill + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + tags: [] + thresholds: '' + title: HTTP 5xx errors + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 0 + grid: {} + id: 7 + interval: ">60s" + legend: + alignAsTable: true + avg: true + current: false + max: true + min: true + show: true + sortDesc: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 8 + stack: false + steppedLine: false + targets: + - expr: sum(nginx_upstream_response_msecs_avg{upstream=~"openstack-[[Service]].*"}) + by (upstream) + format: time_series + intervalFactor: 2 + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: HTTP response time + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + grid: {} + id: 9 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: true + targets: + - alias: healthy + column: value + expr: openstack_check_[[Service]]_api + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + select: [] + step: 120 + tags: [] + thresholds: [] + timeFrom: + timeShift: + title: API Availability + tooltip: + msResolution: false + shared: false + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: none + label: '' + logBase: 1 + max: 1 + min: 0 + show: false + - format: short + logBase: 1 + max: + min: + show: false + - aliasColors: + '{status_code="2xx"}': "#629E51" + '{status_code="5xx"}': "#BF1B00" + bars: true + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 0 + grid: {} + id: 8 + interval: "> 60s" + legend: + alignAsTable: false + avg: false + current: false + hideEmpty: false + max: false + min: false + rightSide: false + show: true + total: false + values: false + lines: false + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 8 + stack: true + steppedLine: false + targets: + - expr: sum(nginx_responses_total{server_zone=~"[[Service]].*"}) by (status_code) + format: time_series + intervalFactor: 2 + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Number of HTTP responses + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Service Status + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + enable: true + list: + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - allValue: + current: + tags: [] + text: cinder + value: cinder + hide: 0 + includeAll: false + label: + multi: false + name: Service + options: + - selected: false + text: nova + value: nova + - selected: false + text: glance + value: glance + - selected: false + text: keystone + value: keystone + - selected: true + text: cinder + value: cinder + - selected: false + text: heat + value: heat + - selected: false + text: placement + value: placement + - selected: false + text: neutron + value: neutron + query: nova,glance,keystone,cinder,heat,placement,neutron + type: custom + time: + from: now-1h + to: now + timepicker: + collapse: false + enable: true + notice: false + now: true + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + status: Stable + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + type: timepicker + timezone: browser + title: Openstack Service + version: 4 diff --git a/grafana/values_overrides/prometheus.yaml b/grafana/values_overrides/prometheus.yaml new file mode 100644 index 000000000..d1aa99eac --- /dev/null +++ b/grafana/values_overrides/prometheus.yaml @@ -0,0 +1,2795 @@ +# NOTE(srwilkers): This overrides file provides a reference for a dashboard for +# Prometheus +conf: + dashboards: + prometheus: + __inputs: + - name: DS_PROMETHEUS + label: Prometheus + description: Prometheus which you want to monitor + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.6.0 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: text + name: Text + version: '' + annotations: + list: + - builtIn: 1 + datasource: "-- Grafana --" + enable: true + hide: true + iconColor: rgba(0, 211, 255, 1) + name: Annotations & Alerts + type: dashboard + - datasource: "${DS_PROMETHEUS}" + enable: true + expr: count(sum(up{instance="$instance"}) by (instance) < 1) + hide: false + iconColor: rgb(250, 44, 18) + limit: 100 + name: downage + showIn: 0 + step: 30s + tagKeys: instance + textFormat: prometheus down + titleFormat: Downage + type: alert + - datasource: "${DS_PROMETHEUS}" + enable: true + expr: sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) + by (instance) + hide: false + iconColor: "#fceaca" + limit: 100 + name: Reload + showIn: 0 + step: 5m + tagKeys: instance + tags: [] + titleFormat: Reload + type: tags + description: Dashboard for monitoring of Prometheus v2.x.x + editable: true + gnetId: 3681 + graphTooltip: 1 + hideControls: false + id: + links: + - icon: info + tags: [] + targetBlank: true + title: 'Dashboard''s Github ' + tooltip: Github repo of this dashboard + type: link + url: https://github.com/FUSAKLA/Prometheus2-grafana-dashboard + - icon: doc + tags: [] + targetBlank: true + title: Prometheus Docs + tooltip: '' + type: link + url: http://prometheus.io/docs/introduction/overview/ + refresh: 5m + rows: + - collapse: false + height: 161 + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - "#299c46" + - rgba(237, 129, 40, 0.89) + - "#bf1b00" + datasource: "${DS_PROMETHEUS}" + decimals: 1 + format: s + gauge: + maxValue: 1000000 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 41 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: time() - process_start_time_seconds{instance="$instance"} + format: time_series + instant: false + intervalFactor: 2 + refId: A + thresholds: '' + title: Uptime + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - "#299c46" + - rgba(237, 129, 40, 0.89) + - "#bf1b00" + datasource: "${DS_PROMETHEUS}" + format: short + gauge: + maxValue: 1000000 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 42 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: true + tableColumn: '' + targets: + - expr: prometheus_tsdb_head_series{instance="$instance"} + format: time_series + instant: false + intervalFactor: 2 + refId: A + thresholds: '500000,800000,1000000' + title: Total count of time series + type: singlestat + valueFontSize: 150% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - "#299c46" + - rgba(237, 129, 40, 0.89) + - "#d44a3a" + datasource: "${DS_PROMETHEUS}" + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 48 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: version + targets: + - expr: prometheus_build_info{instance="$instance"} + format: table + instant: true + intervalFactor: 2 + refId: A + thresholds: '' + title: Version + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - "#299c46" + - rgba(237, 129, 40, 0.89) + - "#d44a3a" + datasource: "${DS_PROMETHEUS}" + decimals: 2 + format: ms + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 49 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: prometheus_tsdb_head_max_time{instance="$instance"} - prometheus_tsdb_head_min_time{instance="$instance"} + format: time_series + instant: true + intervalFactor: 2 + refId: A + thresholds: '' + title: Actual head block length + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - content: + height: '' + id: 50 + links: [] + mode: html + span: 1 + title: '' + transparent: true + type: text + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - "#e6522c" + - rgba(237, 129, 40, 0.89) + - "#299c46" + datasource: "${DS_PROMETHEUS}" + decimals: 1 + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 52 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: '2' + format: time_series + intervalFactor: 2 + refId: A + thresholds: '10,20' + title: '' + transparent: true + type: singlestat + valueFontSize: 200% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Header instance info + titleSize: h6 + - collapse: false + height: '250' + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 15 + legend: + avg: true + current: false + max: false + min: false + show: false + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: true + steppedLine: false + targets: + - expr: max(prometheus_engine_query_duration_seconds{instance="$instance"}) by + (instance, slice) + format: time_series + intervalFactor: 1 + legendFormat: max duration for {{slice}} + metric: prometheus_local_storage_rushed_mode + refId: A + step: 900 + thresholds: [] + timeFrom: + timeShift: + title: Query elapsed time + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: '' + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 17 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_tsdb_head_series_created_total{instance="$instance"}[$aggregation_interval])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: created on {{ instance }} + metric: prometheus_local_storage_maintain_series_duration_seconds_count + refId: A + step: 1800 + - expr: sum(increase(prometheus_tsdb_head_series_removed_total{instance="$instance"}[$aggregation_interval])) + by (instance) * -1 + format: time_series + intervalFactor: 2 + legendFormat: removed on {{ instance }} + refId: B + thresholds: [] + timeFrom: + timeShift: + title: Head series created/deleted + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 13 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: exceeded_sample_limit on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: A + step: 1800 + - expr: sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: duplicate_timestamp on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: B + step: 1800 + - expr: sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: out_of_bounds on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: C + step: 1800 + - expr: sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: out_of_order on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: D + step: 1800 + - expr: sum(increase(prometheus_rule_evaluation_failures_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: rule_evaluation_failure on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: G + step: 1800 + - expr: sum(increase(prometheus_tsdb_compactions_failed_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: tsdb_compactions_failed on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: K + step: 1800 + - expr: sum(increase(prometheus_tsdb_reloads_failures_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: tsdb_reloads_failures on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: L + step: 1800 + - expr: sum(increase(prometheus_tsdb_head_series_not_found{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: head_series_not_found on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: N + step: 1800 + - expr: sum(increase(prometheus_evaluator_iterations_missed_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: evaluator_iterations_missed on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: O + step: 1800 + - expr: sum(increase(prometheus_evaluator_iterations_skipped_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: evaluator_iterations_skipped on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: P + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Prometheus errors + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Main info + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + description: '' + editable: true + error: false + fill: 1 + grid: {} + id: 25 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: false + show: false + sort: max + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: prometheus_target_interval_length_seconds{instance="$instance",quantile="0.99"} + - 60 + format: time_series + interval: 2m + intervalFactor: 1 + legendFormat: "{{instance}}" + metric: '' + refId: A + step: 300 + thresholds: [] + timeFrom: + timeShift: + title: Scrape delay (counts with 1m scrape interval) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + logBase: 1 + max: + min: + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 14 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: Queue length + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_evaluator_duration_seconds{instance="$instance"}) by (instance, + quantile) + format: time_series + intervalFactor: 2 + legendFormat: Queue length + metric: prometheus_local_storage_indexing_queue_length + refId: B + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Rule evaulation duration + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Scrape & rule duration + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 18 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(increase(http_requests_total{instance="$instance"}[$aggregation_interval])) + by (instance, handler) > 0 + format: time_series + intervalFactor: 2 + legendFormat: "{{ handler }} on {{ instance }}" + metric: '' + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Request count + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: none + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 16 + legend: + avg: false + current: false + hideEmpty: true + hideZero: true + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: max(sum(http_request_duration_microseconds{instance="$instance"}) by (instance, + handler, quantile)) by (instance, handler) > 0 + format: time_series + hide: false + intervalFactor: 2 + legendFormat: "{{ handler }} on {{ instance }}" + refId: B + thresholds: [] + timeFrom: + timeShift: + title: Request duration per handler + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: µs + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 19 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(increase(http_request_size_bytes{instance="$instance", quantile="0.99"}[$aggregation_interval])) + by (instance, handler) > 0 + format: time_series + hide: false + intervalFactor: 2 + legendFormat: "{{ handler }} in {{ instance }}" + refId: B + thresholds: [] + timeFrom: + timeShift: + title: Request size by handler + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Allocated bytes: "#F9BA8F" + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max count collector: "#bf1b00" + Max count harvester: "#bf1b00" + Max to persist: "#3F6833" + RSS: "#890F02" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/Max.*/" + fill: 0 + linewidth: 2 + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_engine_queries{instance="$instance"}) by (instance, handler) + format: time_series + intervalFactor: 2 + legendFormat: 'Current count ' + metric: last + refId: A + step: 1800 + - expr: sum(prometheus_engine_queries_concurrent_max{instance="$instance"}) by + (instance, handler) + format: time_series + intervalFactor: 2 + legendFormat: Max count + metric: last + refId: B + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Cont of concurent queries + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Requests & queries + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: + Alert queue capacity on o collector: "#bf1b00" + Alert queue capacity on o harvester: "#bf1b00" + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 20 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/.*capacity.*/" + fill: 0 + linewidth: 2 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_notifications_queue_capacity{instance="$instance"})by (instance) + format: time_series + intervalFactor: 2 + legendFormat: 'Alert queue capacity ' + metric: prometheus_local_storage_checkpoint_last_size_bytes + refId: A + step: 1800 + - expr: sum(prometheus_notifications_queue_length{instance="$instance"})by (instance) + format: time_series + intervalFactor: 2 + legendFormat: 'Alert queue size on ' + metric: prometheus_local_storage_checkpoint_last_size_bytes + refId: B + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Alert queue size + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 21 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_notifications_alertmanagers_discovered{instance="$instance"}) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: Checkpoint chunks written/s + metric: prometheus_local_storage_checkpoint_series_chunks_written_sum + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Count of discovered alertmanagers + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: none + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 39 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_notifications_dropped_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: notifications_dropped on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: F + step: 1800 + - expr: sum(increase(prometheus_rule_evaluation_failures_total{rule_type="alerting",instance="$instance"}[$aggregation_interval])) + by (rule_type,instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: rule_evaluation_failures on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Alerting errors + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Alerting + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 45 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: increase(prometheus_target_sync_length_seconds_count{scrape_job="kubernetes-service-endpoints"}[$aggregation_interval]) + format: time_series + intervalFactor: 2 + legendFormat: Count of target synces + refId: A + step: 240 + thresholds: [] + timeFrom: + timeShift: + title: Kubernetes SD sync count + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 46 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: exceeded_sample_limit on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: A + step: 1800 + - expr: sum(increase(prometheus_sd_file_read_errors_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: sd_file_read_error on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: E + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Service discovery errors + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Service discovery + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 36 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_tsdb_reloads_total{instance="$instance"}[30m])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: "{{ instance }}" + refId: A + thresholds: [] + timeFrom: + timeShift: + title: Reloaded block from disk + tooltip: + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 5 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_tsdb_blocks_loaded{instance="$instance"}) by (instance) + format: time_series + intervalFactor: 2 + legendFormat: Loaded data blocks + metric: prometheus_local_storage_memory_chunkdescs + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Loaded data blocks + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: prometheus_tsdb_head_series{instance="$instance"} + format: time_series + intervalFactor: 2 + legendFormat: Time series count + metric: prometheus_local_storage_memory_series + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Time series total count + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 1 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(rate(prometheus_tsdb_head_samples_appended_total{instance="$instance"}[$aggregation_interval])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: samples/s {{instance}} + metric: prometheus_local_storage_ingested_samples_total + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Samples Appended per second + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: '' + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: TSDB stats + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + To persist: "#9AC48A" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 2 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/Max.*/" + fill: 0 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_tsdb_head_chunks{instance="$instance"}) by (instance) + format: time_series + hide: false + intervalFactor: 2 + legendFormat: Head chunk count + metric: prometheus_local_storage_memory_chunks + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Head chunks count + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 35 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: max(prometheus_tsdb_head_max_time{instance="$instance"}) by (instance) + - min(prometheus_tsdb_head_min_time{instance="$instance"}) by (instance) + format: time_series + intervalFactor: 2 + legendFormat: "{{ instance }}" + refId: A + thresholds: [] + timeFrom: + timeShift: + title: Length of head block + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: ms + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 4 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(rate(prometheus_tsdb_head_chunks_created_total{instance="$instance"}[$aggregation_interval])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: created on {{ instance }} + refId: B + - expr: sum(rate(prometheus_tsdb_head_chunks_removed_total{instance="$instance"}[$aggregation_interval])) + by (instance) * -1 + format: time_series + intervalFactor: 2 + legendFormat: deleted on {{ instance }} + refId: C + thresholds: [] + timeFrom: + timeShift: + title: Head Chunks Created/Deleted per second + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Head block stats + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 33 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(increase(prometheus_tsdb_compaction_duration_sum{instance="$instance"}[30m]) + / increase(prometheus_tsdb_compaction_duration_count{instance="$instance"}[30m])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: "{{ instance }}" + refId: B + thresholds: [] + timeFrom: + timeShift: + title: Compaction duration + tooltip: + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 34 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_tsdb_head_gc_duration_seconds{instance="$instance"}) by + (instance, quantile) + format: time_series + intervalFactor: 2 + legendFormat: "{{ quantile }} on {{ instance }}" + refId: A + thresholds: [] + timeFrom: + timeShift: + title: Go Garbage collection duration + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 37 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(prometheus_tsdb_wal_truncate_duration_seconds{instance="$instance"}) + by (instance, quantile) + format: time_series + intervalFactor: 2 + legendFormat: "{{ quantile }} on {{ instance }}" + refId: A + thresholds: [] + timeFrom: + timeShift: + title: WAL truncate duration seconds + tooltip: + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + fill: 1 + id: 38 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(tsdb_wal_fsync_duration_seconds{instance="$instance"}) by (instance, + quantile) + format: time_series + intervalFactor: 2 + legendFormat: "{{ quantile }} {{ instance }}" + refId: A + thresholds: [] + timeFrom: + timeShift: + title: WAL fsync duration seconds + tooltip: + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Data maintenance + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: + Allocated bytes: "#7EB26D" + Allocated bytes - 1m max: "#BF1B00" + Allocated bytes - 1m min: "#BF1B00" + Allocated bytes - 5m max: "#BF1B00" + Allocated bytes - 5m min: "#BF1B00" + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + RSS: "#447EBC" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + decimals: + editable: true + error: false + fill: 1 + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/-/" + fill: 0 + - alias: collector heap size + color: "#E0752D" + fill: 0 + linewidth: 2 + - alias: collector kubernetes memory limit + color: "#BF1B00" + fill: 0 + linewidth: 3 + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(process_resident_memory_bytes{instance="$instance"}) by (instance) + format: time_series + hide: false + intervalFactor: 2 + legendFormat: Total resident memory - {{instance}} + metric: process_resident_memory_bytes + refId: B + step: 1800 + - expr: sum(go_memstats_alloc_bytes{instance="$instance"}) by (instance) + format: time_series + hide: false + intervalFactor: 2 + legendFormat: Total llocated bytes - {{instance}} + metric: go_memstats_alloc_bytes + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Memory + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Allocated bytes: "#F9BA8F" + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + RSS: "#890F02" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 7 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: rate(go_memstats_alloc_bytes_total{instance="$instance"}[$aggregation_interval]) + format: time_series + intervalFactor: 2 + legendFormat: Allocated Bytes/s + metric: go_memstats_alloc_bytes + refId: A + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Allocations per second + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + decimals: 2 + editable: true + error: false + fill: 1 + id: 9 + legend: + alignAsTable: false + avg: false + current: false + hideEmpty: false + max: false + min: false + rightSide: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: sum(rate(process_cpu_seconds_total{instance="$instance"}[$aggregation_interval])) + by (instance) + format: time_series + intervalFactor: 2 + legendFormat: CPU/s + metric: prometheus_local_storage_ingested_samples_total + refId: B + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: CPU per second + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: + - avg + yaxes: + - format: none + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: RAM&CPU + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: + Chunks: "#1F78C1" + Chunks to persist: "#508642" + Max chunks: "#052B51" + Max to persist: "#3F6833" + bars: false + dashLength: 10 + dashes: false + datasource: "${DS_PROMETHEUS}" + editable: true + error: false + fill: 1 + id: 47 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum(increase(net_conntrack_dialer_conn_failed_total{instance="$instance"}[$aggregation_interval])) + by (instance) > 0 + format: time_series + hide: false + interval: '' + intervalFactor: 2 + legendFormat: conntrack_dialer_conn_failed on {{ instance }} + metric: prometheus_local_storage_chunk_ops_total + refId: M + step: 1800 + thresholds: [] + timeFrom: + timeShift: + title: Net errors + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Contrac errors + titleSize: h6 + schemaVersion: 14 + style: dark + tags: + - prometheus + templating: + list: + - auto: true + auto_count: 30 + auto_min: 2m + current: + text: auto + value: "$__auto_interval" + hide: 0 + label: aggregation intarval + name: aggregation_interval + options: + - selected: true + text: auto + value: "$__auto_interval" + - selected: false + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 2 + type: interval + - allValue: + current: {} + datasource: "${DS_PROMETHEUS}" + hide: 0 + includeAll: false + label: Instance + multi: false + name: instance + options: [] + query: label_values(prometheus_build_info, instance) + refresh: 2 + regex: '' + sort: 2 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + - current: + text: Prometheus + value: Prometheus + hide: 0 + label: Prometheus datasource + name: DS_PROMETHEUS + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + - current: + text: influxdb(heapster) - kokura + value: influxdb(heapster) - kokura + hide: 0 + label: InfluxDB datasource + name: influx_datasource + options: [] + query: influxdb + refresh: 1 + regex: '' + type: datasource + time: + from: now-7d + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Prometheus2.0 (v1.0.0 by FUSAKLA) + version: 8 diff --git a/roles/osh-run-script/defaults/main.yaml b/roles/osh-run-script/defaults/main.yaml index f84fb778a..fc1d61755 100644 --- a/roles/osh-run-script/defaults/main.yaml +++ b/roles/osh-run-script/defaults/main.yaml @@ -11,7 +11,6 @@ # limitations under the License. osh_params: - openstack_release: newton container_distro_name: ubuntu container_distro_version: xenial #feature_gates: diff --git a/roles/osh-run-script/tasks/main.yaml b/roles/osh-run-script/tasks/main.yaml index a64ed1737..535020c61 100644 --- a/roles/osh-run-script/tasks/main.yaml +++ b/roles/osh-run-script/tasks/main.yaml @@ -21,7 +21,6 @@ OSH_EXTRA_HELM_ARGS: "{{ zuul_osh_extra_helm_args_relative_path | default('') }}" OSH_PATH: "{{ zuul_osh_relative_path | default('../openstack-helm/') }}" OSH_INFRA_PATH: "{{ zuul_osh_infra_relative_path | default('../openstack-helm-infra/') }}" - OPENSTACK_RELEASE: "{{ osh_params.openstack_release }}" - CONTAINER_DISTRO_NAME: "{{ osh_params.container_distro_name }}" - CONTAINER_DISTRO_VERSION: "{{ osh_params.container_distro_version }}" + CONTAINER_DISTRO_NAME: "{{ osh_params.container_distro_name | default('') }}" + CONTAINER_DISTRO_VERSION: "{{ osh_params.container_distro_version | default('') }}" FEATURE_GATES: "{{ osh_params.feature_gates | default('') }}" diff --git a/tools/deployment/common/000-install-packages.sh b/tools/deployment/common/000-install-packages.sh index 4b3129b07..d84055510 100755 --- a/tools/deployment/common/000-install-packages.sh +++ b/tools/deployment/common/000-install-packages.sh @@ -22,4 +22,5 @@ sudo apt-get install --no-install-recommends -y \ git \ make \ nmap \ - curl + curl \ + bc diff --git a/tools/deployment/multinode/100-grafana.sh b/tools/deployment/multinode/100-grafana.sh index 1aff7ab1a..44e9697f2 100755 --- a/tools/deployment/multinode/100-grafana.sh +++ b/tools/deployment/multinode/100-grafana.sh @@ -19,10 +19,15 @@ set -xe #NOTE: Lint and package chart make grafana +FEATURE_GATES="calico,ceph,containers,coredns,elasticsearch,kubernetes,nginx,nodes,openstack,prometheus" +: ${OSH_INFRA_EXTRA_HELM_ARGS_GRAFANA:="$({ ./tools/deployment/common/get-values-overrides.sh grafana;} 2> /dev/null)"} + #NOTE: Deploy command helm upgrade --install grafana ./grafana \ --namespace=osh-infra \ - --set pod.replicas.grafana=2 + --set pod.replicas.grafana=2 \ + ${OSH_INFRA_EXTRA_HELM_ARGS} \ + ${OSH_INFRA_EXTRA_HELM_ARGS_GRAFANA} #NOTE: Wait for deploy ./tools/deployment/common/wait-for-pods.sh osh-infra diff --git a/tools/deployment/osh-infra-monitoring/110-grafana.sh b/tools/deployment/osh-infra-monitoring/110-grafana.sh index 5cfc510a9..4b6a98ba9 100755 --- a/tools/deployment/osh-infra-monitoring/110-grafana.sh +++ b/tools/deployment/osh-infra-monitoring/110-grafana.sh @@ -19,7 +19,8 @@ set -xe #NOTE: Lint and package chart make grafana -: ${OSH_INFRA_EXTRA_HELM_ARGS_GRAFANA:="$(./tools/deployment/common/get-values-overrides.sh grafana)"} +FEATURE_GATES="calico,ceph,containers,coredns,elasticsearch,kubernetes,nginx,nodes,openstack,prometheus" +: ${OSH_INFRA_EXTRA_HELM_ARGS_GRAFANA:="$({ ./tools/deployment/common/get-values-overrides.sh grafana;} 2> /dev/null)"} #NOTE: Deploy command helm upgrade --install grafana ./grafana \