Prometheus: Prune large unused time series metrics

This begins to drop metrics from Prometheus scrape configurations.
The metrics dropped are metrics not currently used by any service
that interacts with Prometheus and are not used in any alerting
rules by default. Dropping these metrics reduces the resource use
by Prometheus, as it reduces the total number of time series data
ingested and analyzed by Prometheus

Change-Id: Ia09ddd482da0119167a19e7e4b092879b672c2ec
This commit is contained in:
Steve Wilkerson 2018-08-20 13:29:40 -05:00
parent 55424bacfd
commit 2e4db10e9b

@ -568,6 +568,171 @@ conf:
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels:
- __name__
regex: 'container_network_tcp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_tasks_state'
action: drop
- source_labels:
- __name__
regex: 'container_network_udp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failures_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_load_average_10s'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_system_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_user_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_free'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_current'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_weighted_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_read_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_write_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_bytes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_last_seen'
action: drop
- source_labels:
- __name__
regex: 'container_memory_cache'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failcnt'
action: drop
- source_labels:
- __name__
regex: 'container_memory_max_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_memory_rss'
action: drop
- source_labels:
- __name__
regex: 'container_memory_swap'
action: drop
- source_labels:
- __name__
regex: 'container_memory_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_period'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_shares'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_reservation_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_swap_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_start_time_seconds'
action: drop
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
@ -608,6 +773,35 @@ conf:
- __meta_kubernetes_endpoint_port_name
action: keep
regex: default;kubernetes;https
metric_relabel_configs:
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'rest_client_request_latency_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_response_sizes_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
action: drop
- source_labels:
- __name__
regex: 'apiserver_request_latencies_summary'
action: drop
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured