diff --git a/.zuul.yaml b/.zuul.yaml index d051c3d..671098e 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -2,8 +2,13 @@ name: ansible-role-carbon-base pre-run: tests/playbooks/pre.yaml run: tests/playbooks/run.yaml + post-run: tests/collect-logs.yaml roles: - zuul: openstack/ansible-role-carbon + - zuul: openstack/ansible-role-whisper + vars: + # Default to python2 to make testing easier + carbon_pip_extra_args: --install-option='--install-scripts=/usr/local/bin' --install-option='--install-lib=/usr/local/lib/python2.7/dist-packages' --install-option='--install-data=/usr/local/share/carbon' # Testing for carbon_install_method: pip - job: @@ -14,6 +19,8 @@ name: ansible-role-carbon-fedora-latest parent: ansible-role-carbon nodeset: fedora-latest + vars: + carbon_pip_extra_args: --install-option='--install-scripts=/usr/local/bin' --install-option='--install-lib=/usr/lib/python2.7/site-packages' --install-option='--install-data=/usr/local/share/carbon' - job: name: ansible-role-carbon-ubuntu-bionic @@ -31,11 +38,14 @@ parent: ansible-role-carbon-base vars: carbon_install_method: git + whisper_install_method: git - job: name: ansible-role-carbon-src-fedora-latest parent: ansible-role-carbon-src nodeset: fedora-latest + vars: + carbon_pip_extra_args: --install-option='--install-scripts=/usr/local/bin' --install-option='--install-lib=/usr/lib/python2.7/site-packages' --install-option='--install-data=/usr/local/share/carbon' - job: name: ansible-role-carbon-src-ubuntu-bionic @@ -56,7 +66,11 @@ - ansible-role-carbon-src-fedora-latest - ansible-role-carbon-src-ubuntu-bionic - ansible-role-carbon-src-ubuntu-xenial - - tox-linters + - tox-linters: + required-projects: + - openstack/ansible-role-whisper + vars: + tox_install_siblings: false gate: jobs: - ansible-role-carbon-fedora-latest @@ -65,4 +79,8 @@ - ansible-role-carbon-src-fedora-latest - ansible-role-carbon-src-ubuntu-bionic - ansible-role-carbon-src-ubuntu-xenial - - tox-linters + - tox-linters: + required-projects: + - openstack/ansible-role-whisper + vars: + tox_install_siblings: false diff --git a/defaults/main.yaml b/defaults/main.yaml index d4f00d3..d16bd39 100644 --- a/defaults/main.yaml +++ b/defaults/main.yaml @@ -12,7 +12,32 @@ # License for the specific language governing permissions and limitations # under the License. --- +# tasks/main.yaml +carbon_task_manager: + - pre + - install + - config + - service + +# tasks/pre.yaml +carbon_user_name: carbon +carbon_user_group: carbon +carbon_user_home: /var/lib/carbon + # tasks/config.yaml +carbon_file_carbon_conf_dest: /etc/carbon/carbon.conf +carbon_file_carbon_conf_group: "{{ carbon_user_group }}" +carbon_file_carbon_conf_mode: 0644 +carbon_file_carbon_conf_owner: "{{ carbon_user_name }}" +carbon_file_carbon_conf_src: etc/carbon/carbon.conf + +carbon_file_storage_schemas_conf_dest: /etc/carbon/storage-schemas.conf +carbon_file_storage_schemas_conf_group: "{{ carbon_user_group }}" +carbon_file_storage_schemas_conf_mode: 0644 +carbon_file_storage_schemas_conf_owner: "{{ carbon_user_name }}" +carbon_file_storage_schemas_conf_src: etc/carbon/storage-schemas.conf + +# tasks/install.yaml carbon_git_dest: "{{ ansible_user_dir }}/src/github.com/graphite-project/carbon" carbon_git_uri: https://github.com/graphite-project/carbon carbon_git_version: master @@ -21,8 +46,23 @@ carbon_git_version: master carbon_install_method: git carbon_pip_name: carbon -# carbon_pip_executable: # carbon_pip_extra_args: +# carbon_pip_executable: # carbon_pip_version: # carbon_pip_virtualenv_python: # carbon_pip_virtualenv: + +# tasks/service.yaml +carbon_file_carbon_cache_service_manage: true +carbon_file_carbon_cache_service_group: root +carbon_file_carbon_cache_service_owner: root + +carbon_file_carbon_cache_service_config_manage: true +carbon_file_carbon_cache_service_config_group: root +carbon_file_carbon_cache_service_config_owner: root + +carbon_service_carbon_cache_daemon_reload: true +carbon_service_carbon_cache_enabled: yes +carbon_service_carbon_cache_manage: true +carbon_service_carbon_cache_name: carbon-cache +carbon_service_carbon_cache_state: started diff --git a/tasks/config.yaml b/tasks/config.yaml new file mode 100644 index 0000000..9975fde --- /dev/null +++ b/tasks/config.yaml @@ -0,0 +1,44 @@ +# Copyright 2015 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +--- +- name: Create required directories. + become: yes + file: + group: "{{ carbon_user_group }}" + owner: "{{ carbon_user_name }}" + path: "{{ item }}" + state: directory + with_items: + - /etc/carbon + - /var/log/carbon + +- name: Install carbon configuration. + become: yes + template: + dest: "{{ carbon_file_carbon_conf_dest }}" + group: "{{ carbon_file_carbon_conf_group }}" + mode: "{{ carbon_file_carbon_conf_mode }}" + owner: "{{ carbon_file_carbon_conf_owner }}" + src: "{{ carbon_file_carbon_conf_src }}" + register: carbon_file_carbon_conf + +- name: Install storage-schemas configuration. + become: yes + template: + dest: "{{ carbon_file_storage_schemas_conf_dest }}" + group: "{{ carbon_file_storage_schemas_conf_group }}" + mode: "{{ carbon_file_storage_schemas_conf_mode }}" + owner: "{{ carbon_file_storage_schemas_conf_owner }}" + src: "{{ carbon_file_storage_schemas_conf_src }}" + register: carbon_file_storage_schemas_conf diff --git a/tasks/install.yaml b/tasks/install.yaml index c2c64cd..329e06f 100644 --- a/tasks/install.yaml +++ b/tasks/install.yaml @@ -24,12 +24,4 @@ state: installed when: carbon_install_method == 'git' or carbon_install_method == 'pip' -- name: Define carbon_pip_executable if needed. - set_fact: - carbon_pip_executable: pip3 - when: - - carbon_install_method == 'git' or carbon_install_method == 'pip' - - carbon_pip_virtualenv_python is not defined - - carbon_pip_executable is not defined - - include: "install/{{ carbon_install_method }}.yaml" diff --git a/tasks/main.yaml b/tasks/main.yaml index 6736e2d..dade4d5 100644 --- a/tasks/main.yaml +++ b/tasks/main.yaml @@ -15,4 +15,7 @@ - name: Include OS-specific variables. include_vars: "{{ ansible_os_family }}.yaml" -- include: install.yaml +- include: "{{ carbon_task }}.yaml" + with_items: "{{ carbon_task_manager }}" + loop_control: + loop_var: carbon_task diff --git a/tasks/pre.yaml b/tasks/pre.yaml new file mode 100644 index 0000000..a6cd153 --- /dev/null +++ b/tasks/pre.yaml @@ -0,0 +1,26 @@ +# Copyright 2016 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +--- +- name: Create group account. + become: yes + group: + name: "{{ carbon_user_group }}" + +- name: Create user account. + become: yes + user: + createhome: yes + group: "{{ carbon_user_group }}" + home: "{{ carbon_user_home }}" + name: "{{ carbon_user_name }}" diff --git a/tasks/service.yaml b/tasks/service.yaml new file mode 100644 index 0000000..62b2669 --- /dev/null +++ b/tasks/service.yaml @@ -0,0 +1,84 @@ +# Copyright 2015 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +--- +- name: Define carbon_file_carbon_cache_service_dest. + set_fact: + carbon_file_carbon_cache_service_dest: "{{ __carbon_file_carbon_cache_service_dest }}" + when: carbon_file_carbon_cache_service_dest is not defined + +- name: Define carbon_file_carbon_cache_service_mode. + set_fact: + carbon_file_carbon_cache_service_mode: "{{ __carbon_file_carbon_cache_service_mode }}" + when: carbon_file_carbon_cache_service_mode is not defined + +- name: Define carbon_file_carbon_cache_service_src. + set_fact: + carbon_file_carbon_cache_service_src: "{{ __carbon_file_carbon_cache_service_src }}" + when: carbon_file_carbon_cache_service_src is not defined + +- name: Define carbon_file_carbon_cache_service_config_dest. + set_fact: + carbon_file_carbon_cache_service_config_dest: "{{ __carbon_file_carbon_cache_service_config_dest }}" + when: carbon_file_carbon_cache_service_config_dest is not defined + +- name: Define carbon_file_carbon_cache_service_config_mode. + set_fact: + carbon_file_carbon_cache_service_config_mode: "{{ __carbon_file_carbon_cache_service_config_mode }}" + when: carbon_file_carbon_cache_service_config_mode is not defined + +- name: Define carbon_file_carbon_cache_service_config_src. + set_fact: + carbon_file_carbon_cache_service_config_src: "{{ __carbon_file_carbon_cache_service_config_src }}" + when: carbon_file_carbon_cache_service_config_src is not defined + +- name: Install carbon-cache service script into place. + become: yes + template: + dest: "{{ carbon_file_carbon_cache_service_dest }}" + group: "{{ carbon_file_carbon_cache_service_group }}" + mode: "{{ carbon_file_carbon_cache_service_mode }}" + owner: "{{ carbon_file_carbon_cache_service_owner }}" + src: "{{ carbon_file_carbon_cache_service_src }}" + register: carbon_file_carbon_cache_service + when: carbon_file_carbon_cache_service_manage + +- name: Create carbon-cache service config directory. + become: yes + file: + group: "{{ carbon_file_carbon_cache_service_config_group }}" + owner: "{{ carbon_file_carbon_cache_service_config_owner }}" + path: "{{ carbon_file_carbon_cache_service_dest }}.d" + state: directory + when: carbon_file_carbon_cache_service_config_manage + +- name: Install carbon-cache service config into place. + become: yes + template: + dest: "{{ carbon_file_carbon_cache_service_config_dest }}" + group: "{{ carbon_file_carbon_cache_service_config_group }}" + mode: "{{ carbon_file_carbon_cache_service_config_mode }}" + owner: "{{ carbon_file_carbon_cache_service_config_owner }}" + src: "{{ carbon_file_carbon_cache_service_config_src }}" + register: carbon_file_carbon_cache_service_config + when: carbon_file_carbon_cache_service_config_manage + +- name: Enable carbon-cache service. + become: yes + systemd: + daemon_reload: "{{ carbon_service_carbon_cache_daemon_reload }}" + enabled: "{{ carbon_service_carbon_cache_enabled }}" + name: "{{ carbon_service_carbon_cache_name }}" + state: "{{ carbon_service_carbon_cache_state }}" + register: carbon_service_carbon_cache + when: carbon_service_carbon_cache_manage diff --git a/templates/etc/carbon/carbon.conf b/templates/etc/carbon/carbon.conf new file mode 100644 index 0000000..ca01b4c --- /dev/null +++ b/templates/etc/carbon/carbon.conf @@ -0,0 +1,620 @@ +[cache] +# Configure carbon directories. +# +# OS environment variables can be used to tell carbon where graphite is +# installed, where to read configuration from and where to write data. +# +# GRAPHITE_ROOT - Root directory of the graphite installation. +# Defaults to ../ +# GRAPHITE_CONF_DIR - Configuration directory (where this file lives). +# Defaults to $GRAPHITE_ROOT/conf/ +# GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files. +# Defaults to $GRAPHITE_ROOT/storage/ +# +# To change other directory paths, add settings to this file. The following +# configuration variables are available with these default values: +# +# STORAGE_DIR = $GRAPHITE_STORAGE_DIR +# LOCAL_DATA_DIR = %(STORAGE_DIR)s/whisper/ +# WHITELISTS_DIR = %(STORAGE_DIR)s/lists/ +# CONF_DIR = %(STORAGE_DIR)s/conf/ +# LOG_DIR = %(STORAGE_DIR)s/log/ +# PID_DIR = %(STORAGE_DIR)s/ +# +# For FHS style directory structures, use: +# +# STORAGE_DIR = /var/lib/carbon/ +# CONF_DIR = /etc/carbon/ +# LOG_DIR = /var/log/carbon/ +# PID_DIR = /var/run/ +# +#LOCAL_DATA_DIR = /opt/graphite/storage/whisper/ + +STORAGE_DIR = /var/lib/carbon/ +CONF_DIR = /etc/carbon/ +LOG_DIR = /var/log/carbon/ +PID_DIR = /var/run/carbon/ + +# Specify the database library used to store metric data on disk. Each database +# may have configurable options to change the behaviour of how it writes to +# persistent storage. +# +# whisper - Fixed-size database, similar in design and purpose to RRD. This is +# the default storage backend for carbon and the most rigorously tested. +# +# ceres - Experimental alternative database that supports storing data in sparse +# files of arbitrary fixed-size resolutions. +DATABASE = whisper + +# Enable daily log rotation. If disabled, a new file will be opened whenever the log file path no +# longer exists (i.e. it is removed or renamed) +ENABLE_LOGROTATION = True + +# Specify the user to drop privileges to +# If this is blank carbon-cache runs as the user that invokes it +# This user must have write access to the local data directory +USER = + +# Limit the size of the cache to avoid swapping or becoming CPU bound. +# Sorts and serving cache queries gets more expensive as the cache grows. +# Use the value "inf" (infinity) for an unlimited cache size. +# value should be an integer number of metric datapoints. +MAX_CACHE_SIZE = inf + +# Limits the number of whisper update_many() calls per second, which effectively +# means the number of write requests sent to the disk. This is intended to +# prevent over-utilizing the disk and thus starving the rest of the system. +# When the rate of required updates exceeds this, then carbon's caching will +# take effect and increase the overall throughput accordingly. +MAX_UPDATES_PER_SECOND = 500 + +# If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a +# stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is +# relatively low and carbon has cached a lot of updates; it enables the carbon +# daemon to shutdown more quickly. +# MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 + +# Softly limits the number of whisper files that get created each minute. +# Setting this value low (e.g. 50) is a good way to ensure that your carbon +# system will not be adversely impacted when a bunch of new metrics are +# sent to it. The trade off is that any metrics received in excess of this +# value will be silently dropped, and the whisper file will not be created +# until such point as a subsequent metric is received and fits within the +# defined rate limit. Setting this value high (like "inf" for infinity) will +# cause carbon to create the files quickly but at the risk of increased I/O. +MAX_CREATES_PER_MINUTE = 50 + +# Set the minimum timestamp resolution supported by this instance. This allows +# internal optimisations by overwriting points with equal truncated timestamps +# in order to limit the number of updates to the database. It defaults to one +# second. +MIN_TIMESTAMP_RESOLUTION = 1 + +# Set the minimum lag in seconds for a point to be written to the database +# in order to optimize batching. This means that each point will wait at least +# the duration of this lag before being written. Setting this to 0 disable the feature. +# This currently only works when using the timesorted write strategy. +# MIN_TIMESTAMP_LAG = 0 + +# Set the interface and port for the line (plain text) listener. Setting the +# interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to +# disable this listener if it is not required. +LINE_RECEIVER_INTERFACE = 0.0.0.0 +LINE_RECEIVER_PORT = 2003 + +# Set this to True to enable the UDP listener. By default this is off +# because it is very common to run multiple carbon daemons and managing +# another (rarely used) port for every carbon instance is not fun. +ENABLE_UDP_LISTENER = False +UDP_RECEIVER_INTERFACE = 0.0.0.0 +UDP_RECEIVER_PORT = 2003 + +# Set the interface and port for the pickle listener. Setting the interface to +# 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this +# listener if it is not required. +PICKLE_RECEIVER_INTERFACE = 0.0.0.0 +PICKLE_RECEIVER_PORT = 2004 + +# Set the interface and port for the protobuf listener. Setting the interface to +# 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this +# listener if it is not required. +# PROTOBUF_RECEIVER_INTERFACE = 0.0.0.0 +# PROTOBUF_RECEIVER_PORT = 2005 + +# Limit the number of open connections the receiver can handle as any time. +# Default is no limit. Setting up a limit for sites handling high volume +# traffic may be recommended to avoid running out of TCP memory or having +# thousands of TCP connections reduce the throughput of the service. +#MAX_RECEIVER_CONNECTIONS = inf + +# Per security concerns outlined in Bug #817247 the pickle receiver +# will use a more secure and slightly less efficient unpickler. +# Set this to True to revert to the old-fashioned insecure unpickler. +USE_INSECURE_UNPICKLER = False + +CACHE_QUERY_INTERFACE = 0.0.0.0 +CACHE_QUERY_PORT = 7002 + +# Set this to False to drop datapoints received after the cache +# reaches MAX_CACHE_SIZE. If this is True (the default) then sockets +# over which metrics are received will temporarily stop accepting +# data until the cache size falls below 95% MAX_CACHE_SIZE. +USE_FLOW_CONTROL = True + +# If enabled this setting is used to timeout metric client connection if no +# metrics have been sent in specified time in seconds +#METRIC_CLIENT_IDLE_TIMEOUT = None + +# By default, carbon-cache will log every whisper update and cache hit. +# This can be excessive and degrade performance if logging on the same +# volume as the whisper data is stored. +LOG_UPDATES = False +LOG_CREATES = False +LOG_CACHE_HITS = False +LOG_CACHE_QUEUE_SORTS = False + +# The thread that writes metrics to disk can use one of the following strategies +# determining the order in which metrics are removed from cache and flushed to +# disk. The default option preserves the same behavior as has been historically +# available in version 0.9.10. +# +# sorted - All metrics in the cache will be counted and an ordered list of +# them will be sorted according to the number of datapoints in the cache at the +# moment of the list's creation. Metrics will then be flushed from the cache to +# disk in that order. +# +# timesorted - All metrics in the list will be looked at and sorted according +# to the timestamp of there datapoints. The metric that were the least recently +# written will be written first. This is an hybrid strategy between max and +# sorted which is particularly adapted to sets of metrics with non-uniform +# resolutions. +# +# max - The writer thread will always pop and flush the metric from cache +# that has the most datapoints. This will give a strong flush preference to +# frequently updated metrics and will also reduce random file-io. Infrequently +# updated metrics may only ever be persisted to disk at daemon shutdown if +# there are a large number of metrics which receive very frequent updates OR if +# disk i/o is very slow. +# +# naive - Metrics will be flushed from the cache to disk in an unordered +# fashion. This strategy may be desirable in situations where the storage for +# whisper files is solid state, CPU resources are very limited or deference to +# the OS's i/o scheduler is expected to compensate for the random write +# pattern. +# +CACHE_WRITE_STRATEGY = sorted + +# On some systems it is desirable for whisper to write synchronously. +# Set this option to True if you'd like to try this. Basically it will +# shift the onus of buffering writes from the kernel into carbon's cache. +WHISPER_AUTOFLUSH = False + +# By default new Whisper files are created pre-allocated with the data region +# filled with zeros to prevent fragmentation and speed up contiguous reads and +# writes (which are common). Enabling this option will cause Whisper to create +# the file sparsely instead. Enabling this option may allow a large increase of +# MAX_CREATES_PER_MINUTE but may have longer term performance implications +# depending on the underlying storage configuration. +# WHISPER_SPARSE_CREATE = False + +# Only beneficial on linux filesystems that support the fallocate system call. +# It maintains the benefits of contiguous reads/writes, but with a potentially +# much faster creation speed, by allowing the kernel to handle the block +# allocation and zero-ing. Enabling this option may allow a large increase of +# MAX_CREATES_PER_MINUTE. If enabled on an OS or filesystem that is unsupported +# this option will gracefully fallback to standard POSIX file access methods. +WHISPER_FALLOCATE_CREATE = True + +# Enabling this option will cause Whisper to lock each Whisper file it writes +# to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when +# multiple carbon-cache daemons are writing to the same files. +# WHISPER_LOCK_WRITES = False + +# On systems which has a large number of metrics, an amount of Whisper write(2)'s +# pageback sometimes cause disk thrashing due to memory shortage, so that abnormal +# disk reads occur. Enabling this option makes it possible to decrease useless +# page cache memory by posix_fadvise(2) with POSIX_FADVISE_RANDOM option. +# WHISPER_FADVISE_RANDOM = False + +# By default all nodes stored in Ceres are cached in memory to improve the +# throughput of reads and writes to underlying slices. Turning this off will +# greatly reduce memory consumption for databases with millions of metrics, at +# the cost of a steep increase in disk i/o, approximately an extra two os.stat +# calls for every read and write. Reasons to do this are if the underlying +# storage can handle stat() with practically zero cost (SSD, NVMe, zRAM). +# Valid values are: +# all - all nodes are cached +# none - node caching is disabled +# CERES_NODE_CACHING_BEHAVIOR = all + +# Ceres nodes can have many slices and caching the right ones can improve +# performance dramatically. Note that there are many trade-offs to tinkering +# with this, and unless you are a ceres developer you *really* should not +# mess with this. Valid values are: +# latest - only the most recent slice is cached +# all - all slices are cached +# none - slice caching is disabled +# CERES_SLICE_CACHING_BEHAVIOR = latest + +# If a Ceres node accumulates too many slices, performance can suffer. +# This can be caused by intermittently reported data. To mitigate +# slice fragmentation there is a tolerance for how much space can be +# wasted within a slice file to avoid creating a new one. That tolerance +# level is determined by MAX_SLICE_GAP, which is the number of consecutive +# null datapoints allowed in a slice file. +# If you set this very low, you will waste less of the *tiny* bit disk space +# that this feature wastes, and you will be prone to performance problems +# caused by slice fragmentation, which can be pretty severe. +# If you set this really high, you will waste a bit more disk space (each +# null datapoint wastes 8 bytes, but keep in mind your filesystem's block +# size). If you suffer slice fragmentation issues, you should increase this or +# run the ceres-maintenance defrag plugin more often. However you should not +# set it to be huge because then if a large but allowed gap occurs it has to +# get filled in, which means instead of a simple 8-byte write to a new file we +# could end up doing an (8 * MAX_SLICE_GAP)-byte write to the latest slice. +# CERES_MAX_SLICE_GAP = 80 + +# Enabling this option will cause Ceres to lock each Ceres file it writes to +# to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when +# multiple carbon-cache daemons are writing to the same files. +# CERES_LOCK_WRITES = False + +# Set this to True to enable whitelisting and blacklisting of metrics in +# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is +# missing or empty, all metrics will pass through +# USE_WHITELIST = False + +# By default, carbon itself will log statistics (such as a count, +# metricsReceived) with the top level prefix of 'carbon' at an interval of 60 +# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation +# CARBON_METRIC_PREFIX = carbon +# CARBON_METRIC_INTERVAL = 60 + +# Enable AMQP if you want to receve metrics using an amqp broker +# ENABLE_AMQP = False + +# Verbose means a line will be logged for every metric received +# useful for testing +# AMQP_VERBOSE = False + +# AMQP_HOST = localhost +# AMQP_PORT = 5672 +# AMQP_VHOST = / +# AMQP_USER = guest +# AMQP_PASSWORD = guest +# AMQP_EXCHANGE = graphite +# AMQP_METRIC_NAME_IN_BODY = False + +# The manhole interface allows you to SSH into the carbon daemon +# and get a python interpreter. BE CAREFUL WITH THIS! If you do +# something like time.sleep() in the interpreter, the whole process +# will sleep! This is *extremely* helpful in debugging, assuming +# you are familiar with the code. If you are not, please don't +# mess with this, you are asking for trouble :) +# +# ENABLE_MANHOLE = False +# MANHOLE_INTERFACE = 127.0.0.1 +# MANHOLE_PORT = 7222 +# MANHOLE_USER = admin +# MANHOLE_PUBLIC_KEY = ssh-rsa AAAAB3NzaC1yc2EAAAABiwAaAIEAoxN0sv/e4eZCPpi3N3KYvyzRaBaMeS2RsOQ/cDuKv11dlNzVeiyc3RFmCv5Rjwn/lQ79y0zyHxw67qLyhQ/kDzINc4cY41ivuQXm2tPmgvexdrBv5nsfEpjs3gLZfJnyvlcVyWK/lId8WUvEWSWHTzsbtmXAF2raJMdgLTbQ8wE= + +# Patterns for all of the metrics this machine will store. Read more at +# http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings +# +# Example: store all sales, linux servers, and utilization metrics +# BIND_PATTERNS = sales.#, servers.linux.#, #.utilization +# +# Example: store everything +# BIND_PATTERNS = # + +# URL of graphite-web instance, this is used to add incoming series to the tag database +# GRAPHITE_URL = http://127.0.0.1:80 + +# Tag support, when enabled carbon will make HTTP calls to graphite-web to update the tag index +# ENABLE_TAGS = True + +# Tag update interval, this specifies how frequently updates to existing series will trigger +# an update to the tag index, the default setting is once every 100 updates +# TAG_UPDATE_INTERVAL = 100 + +# Tag hash filenames, this specifies whether tagged metric filenames should use the hash of the metric name +# or a human-readable name, using hashed names avoids issues with path length when using a large number of tags +# TAG_HASH_FILENAMES = True + +# Tag batch size, this specifies the maximum number of series to be sent to graphite-web in a single batch +# TAG_BATCH_SIZE = 100 + +# Tag queue size, this specifies the maximum number of series to be queued for sending to graphite-web +# There are separate queues for new series and for updates to existing series +# TAG_QUEUE_SIZE = 10000 + +# Set to enable Sentry.io exception monitoring. +# RAVEN_DSN='YOUR_DSN_HERE'. + +# To configure special settings for the carbon-cache instance 'b', uncomment this: +#[cache:b] +#LINE_RECEIVER_PORT = 2103 +#PICKLE_RECEIVER_PORT = 2104 +#CACHE_QUERY_PORT = 7102 +# and any other settings you want to customize, defaults are inherited +# from the [cache] section. +# You can then specify the --instance=b option to manage this instance +# +# In order to turn off logging of successful connections for the line +# receiver, set this to False +# LOG_LISTENER_CONN_SUCCESS = True + +[relay] +LINE_RECEIVER_INTERFACE = 0.0.0.0 +LINE_RECEIVER_PORT = 2013 +PICKLE_RECEIVER_INTERFACE = 0.0.0.0 +PICKLE_RECEIVER_PORT = 2014 + +# Carbon-relay has several options for metric routing controlled by RELAY_METHOD +# +# Use relay-rules.conf to route metrics to destinations based on pattern rules +#RELAY_METHOD = rules +# +# Use consistent-hashing for even distribution of metrics between destinations +#RELAY_METHOD = consistent-hashing +# +# Use consistent-hashing but take into account an aggregation-rules.conf shared +# by downstream carbon-aggregator daemons. This will ensure that all metrics +# that map to a given aggregation rule are sent to the same carbon-aggregator +# instance. +# Enable this for carbon-relays that send to a group of carbon-aggregators +#RELAY_METHOD = aggregated-consistent-hashing +# +# You can also use fast-hashing and fast-aggregated-hashing which are in O(1) +# and will always redirect the metrics to the same destination but do not try +# to minimize rebalancing when the list of destinations is changing. +RELAY_METHOD = rules + +# If you use consistent-hashing you can add redundancy by replicating every +# datapoint to more than one machine. +REPLICATION_FACTOR = 1 + +# For REPLICATION_FACTOR >=2, set DIVERSE_REPLICAS to True to guarantee replicas +# across distributed hosts. With this setting disabled, it's possible that replicas +# may be sent to different caches on the same host. This has been the default +# behavior since introduction of 'consistent-hashing' relay method. +# Note that enabling this on an existing pre-0.9.14 cluster will require rebalancing +# your metrics across the cluster nodes using a tool like Carbonate. +#DIVERSE_REPLICAS = True + +# This is a list of carbon daemons we will send any relayed or +# generated metrics to. The default provided would send to a single +# carbon-cache instance on the default port. However if you +# use multiple carbon-cache instances then it would look like this: +# +# DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b +# +# The general form is IP:PORT:INSTANCE where the :INSTANCE part is +# optional and refers to the "None" instance if omitted. +# +# Note that if the destinations are all carbon-caches then this should +# exactly match the webapp's CARBONLINK_HOSTS setting in terms of +# instances listed (order matters!). +# +# If using RELAY_METHOD = rules, all destinations used in relay-rules.conf +# must be defined in this list +DESTINATIONS = 127.0.0.1:2004 + +# This define the protocol to use to contact the destination. It can be +# set to one of "line", "pickle", "udp" and "protobuf". This list can be +# extended with CarbonClientFactory plugins and defaults to "pickle". +# DESTINATION_PROTOCOL = pickle + +# When using consistent hashing it sometime makes sense to make +# the ring dynamic when you don't want to loose points when a +# single destination is down. Replication is an answer to that +# but it can be quite expensive. +# DYNAMIC_ROUTER = False + +# Controls the number of connection attempts before marking a +# destination as down. We usually do one connection attempt per +# second. +# DYNAMIC_ROUTER_MAX_RETRIES = 5 + +# This is the maximum number of datapoints that can be queued up +# for a single destination. Once this limit is hit, we will +# stop accepting new data if USE_FLOW_CONTROL is True, otherwise +# we will drop any subsequently received datapoints. +MAX_QUEUE_SIZE = 10000 + +# This defines the maximum "message size" between carbon daemons. If +# your queue is large, setting this to a lower number will cause the +# relay to forward smaller discrete chunks of stats, which may prevent +# overloading on the receiving side after a disconnect. +MAX_DATAPOINTS_PER_MESSAGE = 500 + +# Limit the number of open connections the receiver can handle as any time. +# Default is no limit. Setting up a limit for sites handling high volume +# traffic may be recommended to avoid running out of TCP memory or having +# thousands of TCP connections reduce the throughput of the service. +#MAX_RECEIVER_CONNECTIONS = inf + +# Specify the user to drop privileges to +# If this is blank carbon-relay runs as the user that invokes it +# USER = + +# This is the percentage that the queue must be empty before it will accept +# more messages. For a larger site, if the queue is very large it makes sense +# to tune this to allow for incoming stats. So if you have an average +# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense +# to allow stats to start flowing when you've cleared the queue to 95% since +# you should have space to accommodate the next minute's worth of stats +# even before the relay incrementally clears more of the queue +QUEUE_LOW_WATERMARK_PCT = 0.8 + +# To allow for batch efficiency from the pickle protocol and to benefit from +# other batching advantages, all writes are deferred by putting them into a queue, +# and then the queue is flushed and sent a small fraction of a second later. +TIME_TO_DEFER_SENDING = 0.0001 + +# Set this to False to drop datapoints when any send queue (sending datapoints +# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the +# default) then sockets over which metrics are received will temporarily stop accepting +# data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. +USE_FLOW_CONTROL = True + +# If enabled this setting is used to timeout metric client connection if no +# metrics have been sent in specified time in seconds +#METRIC_CLIENT_IDLE_TIMEOUT = None + +# Set this to True to enable whitelisting and blacklisting of metrics in +# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is +# missing or empty, all metrics will pass through +# USE_WHITELIST = False + +# By default, carbon itself will log statistics (such as a count, +# metricsReceived) with the top level prefix of 'carbon' at an interval of 60 +# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation +# CARBON_METRIC_PREFIX = carbon +# CARBON_METRIC_INTERVAL = 60 +# +# In order to turn off logging of successful connections for the line +# receiver, set this to False +# LOG_LISTENER_CONN_SUCCESS = True + +# If you're connecting from the relay to a destination that's over the +# internet or similarly iffy connection, a backlog can develop because +# of internet weather conditions, e.g. acks getting lost or similar issues. +# To deal with that, you can enable USE_RATIO_RESET which will let you +# re-set the connection to an individual destination. Defaults to being off. +USE_RATIO_RESET=False + +# When there is a small number of stats flowing, it's not desirable to +# perform any actions based on percentages - it's just too "twitchy". +MIN_RESET_STAT_FLOW=1000 + +# When the ratio of stats being sent in a reporting interval is far +# enough from 1.0, we will disconnect the socket and reconnecto to +# clear out queued stats. The default ratio of 0.9 indicates that 10% +# of stats aren't being delivered within one CARBON_METRIC_INTERVAL +# (default of 60 seconds), which can lead to a queue backup. Under +# some circumstances re-setting the connection can fix this, so +# set this according to your tolerance, and look in the logs for +# "resetConnectionForQualityReasons" to observe whether this is kicking +# in when your sent queue is building up. +MIN_RESET_RATIO=0.9 + +# The minimum time between resets. When a connection is re-set, we +# need to wait before another reset is performed. +# (2*CARBON_METRIC_INTERVAL) + 1 second is the minimum time needed +# before stats for the new connection will be available. Setting this +# below (2*CARBON_METRIC_INTERVAL) + 1 second will result in a lot of +# reset connections for no good reason. +MIN_RESET_INTERVAL=121 + +# Enable TCP Keep Alive (http://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html). +# Default settings will send a probe every 30s. Default is False. +# TCP_KEEPALIVE=True + +[aggregator] +LINE_RECEIVER_INTERFACE = 0.0.0.0 +LINE_RECEIVER_PORT = 2023 + +PICKLE_RECEIVER_INTERFACE = 0.0.0.0 +PICKLE_RECEIVER_PORT = 2024 + +# If set true, metric received will be forwarded to DESTINATIONS in addition to +# the output of the aggregation rules. If set false the carbon-aggregator will +# only ever send the output of aggregation. +FORWARD_ALL = True + +# Filenames of the configuration files to use for this instance of aggregator. +# Filenames are relative to CONF_DIR. +# +# AGGREGATION_RULES = aggregation-rules.conf +# REWRITE_RULES = rewrite-rules.conf + +# This is a list of carbon daemons we will send any relayed or +# generated metrics to. The default provided would send to a single +# carbon-cache instance on the default port. However if you +# use multiple carbon-cache instances then it would look like this: +# +# DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b +# +# The format is comma-delimited IP:PORT:INSTANCE where the :INSTANCE part is +# optional and refers to the "None" instance if omitted. +# +# Note that if the destinations are all carbon-caches then this should +# exactly match the webapp's CARBONLINK_HOSTS setting in terms of +# instances listed (order matters!). +DESTINATIONS = 127.0.0.1:2004 + +# If you want to add redundancy to your data by replicating every +# datapoint to more than one machine, increase this. +REPLICATION_FACTOR = 1 + +# This is the maximum number of datapoints that can be queued up +# for a single destination. Once this limit is hit, we will +# stop accepting new data if USE_FLOW_CONTROL is True, otherwise +# we will drop any subsequently received datapoints. +MAX_QUEUE_SIZE = 10000 + +# Set this to False to drop datapoints when any send queue (sending datapoints +# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the +# default) then sockets over which metrics are received will temporarily stop accepting +# data until the send queues fall below 80% MAX_QUEUE_SIZE. +USE_FLOW_CONTROL = True + +# If enabled this setting is used to timeout metric client connection if no +# metrics have been sent in specified time in seconds +#METRIC_CLIENT_IDLE_TIMEOUT = None + +# This defines the maximum "message size" between carbon daemons. +# You shouldn't need to tune this unless you really know what you're doing. +MAX_DATAPOINTS_PER_MESSAGE = 500 + +# This defines how many datapoints the aggregator remembers for +# each metric. Aggregation only happens for datapoints that fall in +# the past MAX_AGGREGATION_INTERVALS * intervalSize seconds. +MAX_AGGREGATION_INTERVALS = 5 + +# Limit the number of open connections the receiver can handle as any time. +# Default is no limit. Setting up a limit for sites handling high volume +# traffic may be recommended to avoid running out of TCP memory or having +# thousands of TCP connections reduce the throughput of the service. +#MAX_RECEIVER_CONNECTIONS = inf + +# By default (WRITE_BACK_FREQUENCY = 0), carbon-aggregator will write back +# aggregated data points once every rule.frequency seconds, on a per-rule basis. +# Set this (WRITE_BACK_FREQUENCY = N) to write back all aggregated data points +# every N seconds, independent of rule frequency. This is useful, for example, +# to be able to query partially aggregated metrics from carbon-cache without +# having to first wait rule.frequency seconds. +# WRITE_BACK_FREQUENCY = 0 + +# Set this to True to enable whitelisting and blacklisting of metrics in +# CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is +# missing or empty, all metrics will pass through +# USE_WHITELIST = False + +# By default, carbon itself will log statistics (such as a count, +# metricsReceived) with the top level prefix of 'carbon' at an interval of 60 +# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation +# CARBON_METRIC_PREFIX = carbon +# CARBON_METRIC_INTERVAL = 60 + +# In order to turn off logging of successful connections for the line +# receiver, set this to False +# LOG_LISTENER_CONN_SUCCESS = True + +# In order to turn off logging of metrics with no corresponding +# aggregation rules receiver, set this to False +# LOG_AGGREGATOR_MISSES = False + +# Specify the user to drop privileges to +# If this is blank carbon-aggregator runs as the user that invokes it +# USER = + +# Part of the code, and particularly aggregator rules, need +# to cache metric names. To avoid leaking too much memory you +# can tweak the size of this cache. The default allow for 1M +# different metrics per rule (~200MiB). +# CACHE_METRIC_NAMES_MAX=1000000 + +# You can optionally set a ttl to this cache. +# CACHE_METRIC_NAMES_TTL=600 diff --git a/templates/etc/carbon/storage-schemas.conf b/templates/etc/carbon/storage-schemas.conf new file mode 100644 index 0000000..720ef91 --- /dev/null +++ b/templates/etc/carbon/storage-schemas.conf @@ -0,0 +1,7 @@ +[carbon] +pattern = ^carbon\. +retentions = 60:90d + +[default_1min_for_1day] +pattern = .* +retentions = 60s:1d diff --git a/templates/etc/systemd/system/carbon-cache.service b/templates/etc/systemd/system/carbon-cache.service new file mode 100644 index 0000000..d72fa7b --- /dev/null +++ b/templates/etc/systemd/system/carbon-cache.service @@ -0,0 +1,14 @@ +[Unit] +Description=Graphite Carbon Cache +After=syslog.target network.target + +[Service] +Type=forking +ExecStart={{ carbon_pip_virtualenv | default('/usr/local') }}/bin/carbon-cache.py --config=/etc/carbon/carbon.conf start +Group=carbon +PIDFile=/var/run/carbon/carbon-cache-a.pid +RuntimeDirectory=carbon +User=carbon + +[Install] +WantedBy=multi-user.target diff --git a/templates/etc/systemd/system/carbon-cache.service.d/override.conf b/templates/etc/systemd/system/carbon-cache.service.d/override.conf new file mode 100644 index 0000000..602ac8e --- /dev/null +++ b/templates/etc/systemd/system/carbon-cache.service.d/override.conf @@ -0,0 +1,3 @@ +# This file is generated by Ansible +# DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN +# diff --git a/tests/collect-logs.yaml b/tests/collect-logs.yaml new file mode 100644 index 0000000..1bf8e56 --- /dev/null +++ b/tests/collect-logs.yaml @@ -0,0 +1,32 @@ +- hosts: all + tasks: + - name: Ensure journald logs directory exists + file: + path: "~/logs/var/log/journal" + state: directory + + - name: Collect journald logs + shell: "sudo journalctl -u {{ item }}.service | tee ~/logs/var/log/journal/{{ item }}.service.log" + args: + creates: "~/logs/var/log/journal/{{ item }}.service.log" + with_items: + - carbon-cache + + - name: Collect journalctl log files + synchronize: + dest: "{{ zuul.executor.log_root }}" + mode: pull + src: ~/logs + verify_host: true + + - name: Collect statsd log files + synchronize: + dest: "{{ zuul.executor.log_root }}/logs" + mode: pull + rsync_opts: + - "--relative" + src: "{{ item }}" + verify_host: true + with_items: + - /etc/carbon + - /var/log/carbon diff --git a/tests/playbooks/run.yaml b/tests/playbooks/run.yaml index 3734083..f1acbda 100644 --- a/tests/playbooks/run.yaml +++ b/tests/playbooks/run.yaml @@ -17,6 +17,7 @@ rolename: ansible-role-carbon roles: + - ansible-role-whisper - "{{ rolename }}" post_tasks: @@ -32,3 +33,36 @@ - carbon_git_dest_stat.stat.exists - carbon_git_dest_stat.stat.isdir when: carbon_install_method == 'git' + + - name: Register /etc/systemd/system/carbon-cache.service + stat: + path: /etc/systemd/system/carbon-cache.service + register: _carbon_cache_service_systemd_stat + + - name: Assert _carbon_cache_service_systemd_stat tests. + assert: + that: + - _carbon_cache_service_systemd_stat.stat.exists + - _carbon_cache_service_systemd_stat.stat.isreg + - _carbon_cache_service_systemd_stat.stat.pw_name == 'root' + - _carbon_cache_service_systemd_stat.stat.gr_name == 'root' + - _carbon_cache_service_systemd_stat.stat.mode == '0644' + + - name: Register /etc/systemd/system/carbon.cache.service.d/override.conf + stat: + path: /etc/systemd/system/carbon-cache.service.d/override.conf + register: _carbon_cache_service_config_stat + + - name: Assert _carbon_cache_service_config_stat tests. + assert: + that: + - _carbon_cache_service_config_stat.stat.exists + - _carbon_cache_service_config_stat.stat.isreg + - _carbon_cache_service_config_stat.stat.pw_name == 'root' + - _carbon_cache_service_config_stat.stat.gr_name == 'root' + - _carbon_cache_service_config_stat.stat.mode == '0644' + + - name: Ensure carbon-cache is running. + become: yes + shell: /usr/sbin/service carbon-cache status + tags: skip_ansible_lint diff --git a/tox.ini b/tox.ini index 03ba7dc..f32d95c 100644 --- a/tox.ini +++ b/tox.ini @@ -30,7 +30,7 @@ commands = flake8 # Ansible Lint Check bash -c "find . -not -path '*/\.*' -type f -regex '.*.y[a]?ml' -print0 | \ - xargs -t -n1 -0 ansible-lint" + xargs -t -n1 -0 ansible-lint -x306" # Ansible Syntax Check bash -c "find tests -type f -regex '.*.y[a]?ml' -print | xargs -t -n1 \ ansible-playbook --syntax-check -i tests/inventory \ diff --git a/vars/Debian.yaml b/vars/Debian.yaml index c60dce4..fa5cc9d 100644 --- a/vars/Debian.yaml +++ b/vars/Debian.yaml @@ -13,3 +13,11 @@ # under the License. --- __carbon_build_depends: [] + +__carbon_file_carbon_cache_service_dest: /etc/systemd/system/carbon-cache.service +__carbon_file_carbon_cache_service_mode: 0644 +__carbon_file_carbon_cache_service_src: etc/systemd/system/carbon-cache.service + +__carbon_file_carbon_cache_service_config_dest: /etc/systemd/system/carbon-cache.service.d/override.conf +__carbon_file_carbon_cache_service_config_mode: 0644 +__carbon_file_carbon_cache_service_config_src: etc/systemd/system/carbon-cache.service.d/override.conf diff --git a/vars/RedHat.yaml b/vars/RedHat.yaml index c60dce4..fa5cc9d 100644 --- a/vars/RedHat.yaml +++ b/vars/RedHat.yaml @@ -13,3 +13,11 @@ # under the License. --- __carbon_build_depends: [] + +__carbon_file_carbon_cache_service_dest: /etc/systemd/system/carbon-cache.service +__carbon_file_carbon_cache_service_mode: 0644 +__carbon_file_carbon_cache_service_src: etc/systemd/system/carbon-cache.service + +__carbon_file_carbon_cache_service_config_dest: /etc/systemd/system/carbon-cache.service.d/override.conf +__carbon_file_carbon_cache_service_config_mode: 0644 +__carbon_file_carbon_cache_service_config_src: etc/systemd/system/carbon-cache.service.d/override.conf