tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml

717 lines
29 KiB
YAML

heat_template_version: rocky
description: >
Containerized collectd service
parameters:
ContainerCollectdImage:
description: image
type: string
ContainerCollectdConfigImage:
description: The container image to use for the collectd config_volume
type: string
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
GnocchiPassword:
type: string
description: The password for the gnocchi service and db account.
hidden: true
KeystoneRegion:
type: string
description: Keystone region for endpoint
default: 'regionOne'
MetricsQdrPort:
default: 5666
description: Service name or port number on which the qdrouterd will accept
connections.
type: number
MetricsQdrUsername:
default: 'guest'
description: Username which should be used to authenticate to the deployed
qdrouterd.
type: string
MetricsQdrPassword:
default: 'guest'
description: Password which should be used to authenticate to the deployed
qdrouterd.
type: string
hidden: true
MonitoringSubscriptionCollectd:
default: 'overcloud-collectd'
type: string
CollectdConnectionType:
default: 'gnocchi'
description: Define which write plugin should collectd use. Currently
supported are 'amqp1', 'network' and 'gnocchi'.
type: string
CollectdDefaultPollingInterval:
default: 120
type: number
description: >
Controls how often registered read functions are called and with that
the resolution of the collected data. This value can be overridden per
plugin(per role) by setting "::collectd::plugin::<plugin_name>::interval"
key in ExtraConfig(<role_name>ExtraConfig).
CollectdDefaultPlugins:
default:
- cpu
- df
- disk
- hugepages
- interface
- load
- memory
- processes
- unixsock
- uptime
type: comma_delimited_list
description: >
List of collectd plugins to activate on all overcloud hosts. See
the documentation for the puppet-collectd module for a list plugins
supported by the module (https://github.com/voxpupuli/puppet-collectd).
Set this key to override the default list of plugins. Use
CollectdExtraPlugins if you want to load additional plugins without
overriding the defaults.
CollectdExtraPlugins:
default: []
type: comma_delimited_list
description: >
List of collectd plugins to activate on all overcloud hosts. See
the documentation for the puppet-collectd module for a list plugins
supported by the module (https://github.com/voxpupuli/puppet-collectd).
Set this key to load plugins in addition to those in
CollectdDefaultPlugins.
CollectdServer:
type: string
description: >
Address of remote collectd server to which we will send
metrics.
default: ''
CollectdServerPort:
type: number
default: 25826
description: >
Port on remote collectd server to which we will send
metrics.
CollectdUsername:
type: string
description: >
Username for authenticating to the remote collectd server. The default
is to not configure any authentication.
default: ''
CollectdPassword:
type: string
hidden: true
description: >
Password for authenticating to the remote collectd server. The
default is to not configure any authentication.
default: ''
CollectdSecurityLevel:
type: string
description: >
Security level setting for remote collectd connection. If it is
set to Sign or Encrypt the CollectdPassword and CollectdUsername
parameters need to be set.
default: 'None'
constraints:
- allowed_values:
- None
- Sign
- Encrypt
CollectdGnocchiAuthMode:
type: string
description: >
Type of authentication Gnocchi server is using. Supported values are
'basic' and 'keystone'.
default: 'keystone'
CollectdGnocchiProtocol:
type: string
description: API protocol Gnocchi server is using.
default: 'http'
CollectdGnocchiServer:
type: string
description: >
The name or address of a gnocchi endpoint to which we should
send metrics.
default: nil
CollectdGnocchiPort:
type: number
description: The port to which we will connect on the Gnocchi server.
default: 8041
CollectdGnocchiUser:
type: string
description: >
Username for authenticating to the remote Gnocchi server using simple
authentication.
default: nil
CollectdGnocchiKeystoneAuthUrl:
type: string
description: Keystone endpoint URL to authenticate to.
default: nil
CollectdGnocchiKeystoneUserName:
type: string
description: Username for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneUserId:
type: string
description: User ID for authenticating to Keystone.
default: nil
CollectdGnocchiKeystonePassword:
type: string
description: Password for authenticating to Keystone
default: nil
hidden: true
CollectdGnocchiKeystoneProjectId:
type: string
description: Project ID for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneProjectName:
type: string
description: Project name for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneUserDomainId:
type: string
description: User domain ID for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneUserDomainName:
type: string
description: User domain name for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneProjectDomainId:
type: string
description: Project domain ID for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneProjectDomainName:
type: string
description: Project domain name for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneRegionName:
type: string
description: Region name for authenticating to Keystone.
default: nil
CollectdGnocchiKeystoneInterface:
type: string
description: Type of Keystone endpoint to authenticate to.
default: nil
CollectdGnocchiKeystoneEndpoint:
type: string
description: >
Explicitly state Gnocchi server URL if you want to override
Keystone value
default: nil
CollectdGnocchiResourceType:
type: string
description: >
Default resource type created by the collectd-gnocchi plugin in Gnocchi
to store hosts.
default: 'collectd'
CollectdGnocchiBatchSize:
type: number
description: Minimum number of values Gnocchi should batch.
default: 10
CollectdAmqpHost:
type: string
description: Hostname or IP address of the AMQP 1.0 intermediary.
default: nil
CollectdAmqpPort:
type: number
description: >
Service name or port number on which the AMQP 1.0 intermediary accepts
connections. This argument must be a string, even if the numeric form
is used.
default: 5666
CollectdAmqpUser:
type: string
description: >
User part of credentials used to authenticate to the AMQP 1.0 intermediary.
default: guest
CollectdAmqpPassword:
type: string
description: >
Password part of credentials used to authenticate to the AMQP 1.0 intermediary.
default: guest
hidden: true
CollectdAmqpTransportName:
type: string
description: Name of the AMQP 1.0 transport.
default: metrics
CollectdAmqpAddress:
type: string
description: >
This option specifies the prefix for the send-to value in the message.
default: collectd
CollectdAmqpInstances:
type: json
description: >
Hash of hashes. Each inner hash represent Instance block in plugin
configuration file. Key of outter hash represents instance name.
The 'address' value concatenated with the 'name' given will be used
as the send-to address for communications over the messaging link.
default: {}
CollectdAmqpRetryDelay:
type: number
description: >
When the AMQP 1.0 connection is lost, defines the time in seconds to wait
before attempting to reconnect.
default: 1
CollectdAmqpInterval:
type: number
description: >
Interval on which metrics should be sent to AMQP intermediary. If not set
the default for all collectd plugins is used.
default: -666
CollectdEnableSensubility:
type: boolean
description: Set to true if sensubility should be executed by exec plugin.
default: false
CollectdSensubilityExecSudoRule:
type: string
description: >
Given rule will be created in /etc/sudoers.d for sensubility to enable it calling
restricted commands via sensubility executor.
default: ''
CollectdSensubilityLogLevel:
type: string
description: Use for override the default logging level (WARNING).
default: WARNING
CollectdSensubilityConnection:
type: string
description: URL to Sensu sever side
default: amqp://sensu:sensu@localhost:5672//sensu
CollectdSensubilityKeepaliveInterval:
type: number
description: Interval in seconds for sending keepalive messages to Sensu server side.
default: 20
CollectdSensubilityTmpDir:
type: string
description: Path to temporary directory which is used for creation of check scripts.
default: /var/tmp/collectd-sensubility-checks
CollectdSensubilityShellPath:
type: string
description: Path to shell used for executing check scripts.
default: /usr/bin/sh
CollectdSensubilityWorkerCount:
type: number
description: Number of goroutines spawned for executing check scripts.
default: 2
CollectdSensubilityChecks:
type: json
description: JSON formated definition of standalone checks to be scheduled on client side.
default: {}
CollectdEnableContainerHealthCheck:
type: boolean
description: >
Set to false if container health check should not be defined and attached
to CollectdEnableContainerHealthCheck.
default: true
CollectdContainerHealthCheckCommand:
type: string
default: |
output=""
while read line ; do
i=$(echo $line | awk '//{gsub(/:/, "", $0); print $5}')
log=$(echo $line | awk '{split($0,a,/:\s+Error:\s+/); print a[2]}')
log=${log:0:-1}
output+=" ; ${i}: ${log}"
done < <(egrep "^[a-zA-Z]{3}\s+[0-9]{2}\s+[0-9\:]{8}\s+.*\s+.*:\s+[Ee]rror\:" /var/log/collectd/healthchecks.log)
truncate -s0 /var/log/collectd/healthchecks.log
if [ ! -z "${output}" ]; then
echo ${output:3} && exit 2;
fi
CollectdContainerHealthCheckInterval:
type: number
description: The frequency in seconds the docker health check is executed.
default: 10
CollectdContainerHealthCheckHandlers:
default: []
description: The Sensu event handler to use for events created by the docker health check.
type: comma_delimited_list
CollectdContainerHealthCheckOccurrences:
type: number
description: The number of event occurrences before sensu-plugin-aware handler should take action.
default: 3
CollectdContainerHealthCheckRefresh:
type: number
description: The number of seconds sensu-plugin-aware handlers should wait before taking second action.
default: 90
EnableSTF:
type: boolean
description: Set to true to enable configuration for STF client.
default: false
CollectdEnableMcelog:
type: boolean
description: Set to true to enable mcelog
default: false
conditions:
amqp_connection:
equals: [{get_param: CollectdConnectionType}, 'amqp1']
amqp_default_connection:
equals: [{get_param: CollectdAmqpHost}, nil]
amqp_default_interval:
equals: [{get_param: CollectdAmqpInterval}, -666]
collectd_connection:
equals: [{get_param: CollectdConnectionType}, 'network']
gnocchi_connection:
equals: [{get_param: CollectdConnectionType}, 'gnocchi']
gnocchi_auth_basic:
equals: [{get_param: CollectdGnocchiAuthMode}, 'basic']
gnocchi_default_connection:
and:
- equals: [{get_param: CollectdGnocchiServer}, nil]
- equals: [{get_param: CollectdGnocchiKeystoneEndpoint}, nil]
enable_sensubility:
equals: [{get_param: CollectdEnableSensubility}, true]
enable_stf:
equals: [{get_param: EnableSTF}, true]
sensubility_needs_sudo:
not:
equals: [{get_param: CollectdSensubilityExecSudoRule}, '']
resources:
ContainersCommon:
type: ../containers-common.yaml
outputs:
role_data:
description: Role data for the collectd role.
value:
service_name: collectd
config_settings:
map_merge:
- tripleo::profile::base::metrics::collectd::enable_file_logging: true
collectd::plugin::logfile::log_file: /var/log/collectd/collectd.log
- collectd::manage_repo: false
collectd::purge: true
collectd::recurse: true
collectd::purge_config: true
collectd::minimum_version: "5.7"
collectd::interval: {get_param: CollectdDefaultPollingInterval}
collectd::plugin::unixsock::socketgroup: root
collectd::plugin::unixsock::socketfile: /var/run/collectd-socket
collectd::plugin::unixsock::deletesocket: true
collectd::plugin::cpu::reportbycpu: true
collectd::plugin::cpu::reportbystate: true
collectd::plugin::cpu::reportnumcpu: false
collectd::plugin::cpu::valuespercentage: true
collectd::plugin::df::ignoreselected: true
collectd::plugin::df::reportbydevice: true
collectd::plugin::df::fstypes: ['xfs']
collectd::plugin::load::reportrelative: true
collectd::plugin::virt::connection: "qemu:///system"
collectd::plugin::virt::extra_stats: "cpu_util disk disk_err pcpu job_stats_background perf vcpupin"
collectd::plugin::virt::hostname_format: "hostname"
tripleo.collectd.plugins.collectd:
yaql:
data:
default_plugins: {get_param: CollectdDefaultPlugins}
stf_plugins:
if:
- enable_stf
- - cpu
- df
- load
- connectivity
- intel_rdt
- ipmi
- procevent
- []
extra_plugins: {get_param: CollectdExtraPlugins}
expression: >
($.data.default_plugins + $.data.stf_plugins + $.data.extra_plugins)
.flatten().distinct()
- if: # Collectd connected to QDR
- amqp_connection
- map_merge:
- tripleo::profile::base::metrics::collectd::amqp_transport_name:
get_param: CollectdAmqpTransportName
tripleo::profile::base::metrics::collectd::amqp_address:
get_param: CollectdAmqpAddress
tripleo::profile::base::metrics::collectd::amqp_instances:
get_param: CollectdAmqpInstances
tripleo::profile::base::metrics::collectd::amqp_retry_delay:
get_param: CollectdAmqpRetryDelay
- if:
- amqp_default_interval
- {}
- tripleo::profile::base::metrics::collectd::amqp_interval:
get_param: CollectdAmqpInterval
- if:
- amqp_default_connection
- tripleo::profile::base::metrics::collectd::amqp_host:
str_replace:
template:
"%{hiera('$NETWORK')}"
params:
$NETWORK:
get_param:
- ServiceNetMap
- str_replace:
template: "ROLENAMEMetricsQdrNetwork"
params:
ROLENAME: {get_param: RoleName}
tripleo::profile::base::metrics::collectd::amqp_port:
get_param: MetricsQdrPort
tripleo::profile::base::metrics::collectd::amqp_user:
get_param: MetricsQdrUsername
tripleo::profile::base::metrics::collectd::amqp_password:
get_param: MetricsQdrPassword
- tripleo::profile::base::metrics::collectd::amqp_host:
get_param: CollectdAmqpHost
tripleo::profile::base::metrics::collectd::amqp_port:
get_param: CollectdAmqpPort
tripleo::profile::base::metrics::collectd::amqp_user:
get_param: CollectdAmqpUser
tripleo::profile::base::metrics::collectd::amqp_password:
get_param: CollectdAmqpPassword
- {}
- if: # Collectd connected to external collectd instance
- collectd_connection
- tripleo::profile::base::metrics::collectd::collectd_server:
get_param: CollectdServer
tripleo::profile::base::metrics::collectd::collectd_port:
get_param: CollectdServerPort
tripleo::profile::base::metrics::collectd::collectd_username:
get_param: CollectdUsername
tripleo::profile::base::metrics::collectd::collectd_password:
get_param: CollectdPassword
tripleo::profile::base::metrics::collectd::collectd_securitylevel:
get_param: CollectdSecurityLevel
- {}
- if: # Collectd connected directly to Gnocchi DB
- gnocchi_connection
- map_merge:
- tripleo::profile::base::metrics::collectd::gnocchi_auth_mode:
get_param: CollectdGnocchiAuthMode
tripleo::profile::base::metrics::collectd::gnocchi_resource_type:
get_param: CollectdGnocchiResourceType
tripleo::profile::base::metrics::collectd::gnocchi_batch_size:
get_param: CollectdGnocchiBatchSize
- if:
- gnocchi_default_connection # use overcloud gnocchi instance or user provided instance?
- tripleo::profile::base::metrics::collectd::gnocchi_keystone_endpoint:
get_param: [EndpointMap, GnocchiInternal, uri]
tripleo::profile::base::metrics::collectd::gnocchi_keystone_auth_url:
get_param: [EndpointMap, KeystoneInternal, uri_no_suffix]
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_name: 'gnocchi'
tripleo::profile::base::metrics::collectd::gnocchi_keystone_password:
get_param: GnocchiPassword
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_name: 'service'
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_name: 'Default'
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_name: 'Default'
tripleo::profile::base::metrics::collectd::gnocchi_keystone_region_name:
get_param: KeystoneRegion
- if:
- gnocchi_auth_basic # use basic auth or keystone auth?
- tripleo::profile::base::metrics::collectd::gnocchi_protocol:
get_param: CollectdGnocchiProtocol
tripleo::profile::base::metrics::collectd::gnocchi_server:
get_param: CollectdGnocchiServer
tripleo::profile::base::metrics::collectd::gnocchi_port:
get_param: CollectdGnocchiPort
tripleo::profile::base::metrics::collectd::gnocchi_user:
get_param: CollectdGnocchiUser
- tripleo::profile::base::metrics::collectd::gnocchi_keystone_auth_url:
get_param: CollectdGnocchiKeystoneAuthUrl
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_name:
get_param: CollectdGnocchiKeystoneUserName
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_id:
get_param: CollectdGnocchiKeystoneUserId
tripleo::profile::base::metrics::collectd::gnocchi_keystone_password:
get_param: CollectdGnocchiKeystonePassword
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_id:
get_param: CollectdGnocchiKeystoneProjectId
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_name:
get_param: CollectdGnocchiKeystoneProjectName
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_id:
get_param: CollectdGnocchiKeystoneUserDomainId
tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_name:
get_param: CollectdGnocchiKeystoneUserDomainName
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_id:
get_param: CollectdGnocchiKeystoneProjectDomainId
tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_name:
get_param: CollectdGnocchiKeystoneProjectDomainName
tripleo::profile::base::metrics::collectd::gnocchi_keystone_region_name:
get_param: CollectdGnocchiKeystoneRegionName
tripleo::profile::base::metrics::collectd::gnocchi_keystone_interface:
get_param: CollectdGnocchiKeystoneInterface
tripleo::profile::base::metrics::collectd::gnocchi_keystone_endpoint:
get_param: CollectdGnocchiKeystoneEndpoint
- {}
- if: # Collectd should run collectd-sensubility via collectd-exec
- enable_sensubility
- map_merge:
- if:
- sensubility_needs_sudo
- tripleo::profile::base::metrics::collectd::sensubility::exec_sudo_rule:
get_param: CollectdSensubilityExecSudoRule
- {}
- tripleo::profile::base::metrics::collectd::enable_sensubility:
get_param: CollectdEnableSensubility
tripleo::profile::base::metrics::collectd::sensubility::connection:
get_param: CollectdSensubilityConnection
tripleo::profile::base::metrics::collectd::sensubility::log_level:
get_param: CollectdSensubilityLogLevel
tripleo::profile::base::metrics::collectd::sensubility::client_name:
str_replace:
template: "%{hiera('fqdn_NETWORK')}"
params:
NETWORK:
get_param:
- ServiceNetMap
- str_replace:
template: "ROLENAMEMetricsQdrNetwork"
params:
ROLENAME: {get_param: RoleName}
tripleo::profile::base::metrics::collectd::sensubility::client_address:
str_replace:
template: "%{hiera('NETWORK')}"
params:
NETWORK:
get_param:
- ServiceNetMap
- str_replace:
template: "ROLENAMEMetricsQdrNetwork"
params:
ROLENAME: {get_param: RoleName}
tripleo::profile::base::metrics::collectd::sensubility::keepalive_interval:
get_param: CollectdSensubilityKeepaliveInterval
tripleo::profile::base::metrics::collectd::sensubility::tmp_base_dir:
get_param: CollectdSensubilityTmpDir
tripleo::profile::base::metrics::collectd::sensubility::shell_path:
get_param: CollectdSensubilityShellPath
tripleo::profile::base::metrics::collectd::sensubility::worker_count:
get_param: CollectdSensubilityWorkerCount
tripleo::profile::base::metrics::collectd::sensubility::checks:
map_merge:
- {get_param: CollectdSensubilityChecks}
- check-container-health:
standalone: true
command: {get_param: CollectdContainerHealthCheckCommand}
interval: {get_param: CollectdContainerHealthCheckInterval}
handlers: {get_param: CollectdContainerHealthCheckHandlers}
occurrences: {get_param: CollectdContainerHealthCheckOccurrences}
refresh: {get_param: CollectdContainerHealthCheckRefresh}
- {}
service_config_settings: {}
# BEGIN DOCKER SETTINGS
puppet_config:
config_volume: collectd
puppet_tags: collectd_client_config,exec
step_config: include ::tripleo::profile::base::metrics::collectd
config_image: {get_param: ContainerCollectdConfigImage}
kolla_config:
/var/lib/kolla/config_files/collectd.json:
command: /usr/sbin/collectd -f
config_files:
- source: "/var/lib/kolla/config_files/src/*"
dest: "/"
merge: true
preserve_properties: true
- source: "/var/lib/kolla/config_files/src/etc/collectd.d"
dest: "/etc/"
merge: false
preserve_properties: true
permissions:
- path: /var/log/collectd
owner: collectd:collectd
recurse: true
docker_config:
step_5:
collectd:
image: {get_param: ContainerCollectdImage}
net: host
pid: host
user: root
restart: always
cap_add:
- IPC_LOCK
healthcheck:
test: /openstack/healthcheck
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
-
- /var/lib/kolla/config_files/collectd.json:/var/lib/kolla/config_files/config.json:ro
- /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro
- /var/log/containers/collectd:/var/log/collectd:rw,z
- /var/run/:/var/run:rw
- /sys/fs/cgroup:/sys/fs/cgroup:ro
environment:
KOLLA_CONFIG_STRATEGY: COPY_ALWAYS
deploy_steps_tasks:
- name: set enable_sensubility fact
set_fact:
enable_sensubility: {get_param: CollectdEnableSensubility}
- name: Configure rsyslog for container healthchecks
when:
- step|int == 1
block:
- name: Check if rsyslog exists
shell: systemctl list-unit-files --type=service | grep -q rsyslog
register: rsyslog_config
failed_when: rsyslog_config.rc == 2
- name: Configure if we can
when:
- rsyslog_config is changed
- rsyslog_config.rc == 0
block:
- name: Log healthchecks in dedicated file
when:
- enable_sensubility|bool
register: logconfig_add
copy:
dest: /etc/rsyslog.d/openstack-healthcheck.conf
content: |
if $programname startswith 'healthcheck_' then -/var/log/containers/collectd/healthchecks.log
& stop
- name: Remove healthcheck log
when:
- not enable_sensubility|bool
register: logconfig_rm
file:
path: /etc/rsyslog.d/openstack-healthcheck.conf
state: absent
- name: Reload rsyslogd if needed
when: logconfig_add is changed or logconfig_rm is changed
service:
name: rsyslog
state: restarted
host_prep_tasks:
- name: create persistent directories
file:
path: "{{ item.path }}"
state: directory
setype: "{{ item.setype }}"
mode: "{{ item.mode }}"
with_items:
- { 'path': /var/log/containers/collectd, 'setype': svirt_sandbox_file_t, 'mode': '0750' }
- name: import provision_mcelog
import_role:
name: tripleo_provision_mcelog
when: {get_param: CollectdEnableMcelog}