heat_template_version: rocky description: > Containerized collectd service parameters: ContainerCollectdImage: description: image type: string ContainerCollectdConfigImage: description: The container image to use for the collectd config_volume type: string EndpointMap: default: {} description: Mapping of service endpoint -> protocol. Typically set via parameter_defaults in the resource registry. type: json ServiceData: default: {} description: Dictionary packing service data type: json ServiceNetMap: default: {} description: Mapping of service_name -> network name. Typically set via parameter_defaults in the resource registry. This mapping overrides those in ServiceNetMapDefaults. type: json RoleName: default: '' description: Role name on which the service is applied type: string RoleParameters: default: {} description: Parameters specific to the role type: json GnocchiPassword: type: string description: The password for the gnocchi service and db account. hidden: true KeystoneRegion: type: string description: Keystone region for endpoint default: 'regionOne' MetricsQdrPort: default: 5666 description: Service name or port number on which the qdrouterd will accept connections. type: number MetricsQdrUsername: default: 'guest' description: Username which should be used to authenticate to the deployed qdrouterd. type: string MetricsQdrPassword: default: 'guest' description: Password which should be used to authenticate to the deployed qdrouterd. type: string hidden: true MonitoringSubscriptionCollectd: default: 'overcloud-collectd' type: string CollectdConnectionType: default: 'amqp1' description: Define which write plugin should collectd use. Currently supported are 'amqp1', 'network' and 'gnocchi'. type: string CollectdDefaultPollingInterval: default: 120 type: number description: > Controls how often registered read functions are called and with that the resolution of the collected data. This value can be overridden per plugin(per role) by setting "::collectd::plugin::::interval" key in ExtraConfig(ExtraConfig). CollectdDefaultPlugins: default: - cpu - df - disk - hugepages - interface - load - memory - processes - unixsock - uptime type: comma_delimited_list description: > List of collectd plugins to activate on all overcloud hosts. See the documentation for the puppet-collectd module for a list plugins supported by the module (https://github.com/voxpupuli/puppet-collectd). Set this key to override the default list of plugins. Use CollectdExtraPlugins if you want to load additional plugins without overriding the defaults. CollectdExtraPlugins: default: [] type: comma_delimited_list description: > List of collectd plugins to activate on all overcloud hosts. See the documentation for the puppet-collectd module for a list plugins supported by the module (https://github.com/voxpupuli/puppet-collectd). Set this key to load plugins in addition to those in CollectdDefaultPlugins. CollectdServer: type: string description: > Address of remote collectd server to which we will send metrics. default: '' CollectdServerPort: type: number default: 25826 description: > Port on remote collectd server to which we will send metrics. CollectdUsername: type: string description: > Username for authenticating to the remote collectd server. The default is to not configure any authentication. default: '' CollectdPassword: type: string hidden: true description: > Password for authenticating to the remote collectd server. The default is to not configure any authentication. default: '' CollectdSecurityLevel: type: string description: > Security level setting for remote collectd connection. If it is set to Sign or Encrypt the CollectdPassword and CollectdUsername parameters need to be set. default: 'None' constraints: - allowed_values: - None - Sign - Encrypt CollectdGnocchiAuthMode: type: string description: > Type of authentication Gnocchi server is using. Supported values are 'basic' and 'keystone'. default: 'keystone' CollectdGnocchiProtocol: type: string description: API protocol Gnocchi server is using. default: 'http' CollectdGnocchiServer: type: string description: > The name or address of a gnocchi endpoint to which we should send metrics. default: nil CollectdGnocchiPort: type: number description: The port to which we will connect on the Gnocchi server. default: 8041 CollectdGnocchiUser: type: string description: > Username for authenticating to the remote Gnocchi server using simple authentication. default: nil CollectdGnocchiKeystoneAuthUrl: type: string description: Keystone endpoint URL to authenticate to. default: nil CollectdGnocchiKeystoneUserName: type: string description: Username for authenticating to Keystone. default: nil CollectdGnocchiKeystoneUserId: type: string description: User ID for authenticating to Keystone. default: nil CollectdGnocchiKeystonePassword: type: string description: Password for authenticating to Keystone default: nil hidden: true CollectdGnocchiKeystoneProjectId: type: string description: Project ID for authenticating to Keystone. default: nil CollectdGnocchiKeystoneProjectName: type: string description: Project name for authenticating to Keystone. default: nil CollectdGnocchiKeystoneUserDomainId: type: string description: User domain ID for authenticating to Keystone. default: nil CollectdGnocchiKeystoneUserDomainName: type: string description: User domain name for authenticating to Keystone. default: nil CollectdGnocchiKeystoneProjectDomainId: type: string description: Project domain ID for authenticating to Keystone. default: nil CollectdGnocchiKeystoneProjectDomainName: type: string description: Project domain name for authenticating to Keystone. default: nil CollectdGnocchiKeystoneRegionName: type: string description: Region name for authenticating to Keystone. default: nil CollectdGnocchiKeystoneInterface: type: string description: Type of Keystone endpoint to authenticate to. default: nil CollectdGnocchiKeystoneEndpoint: type: string description: > Explicitly state Gnocchi server URL if you want to override Keystone value default: nil CollectdGnocchiResourceType: type: string description: > Default resource type created by the collectd-gnocchi plugin in Gnocchi to store hosts. default: 'collectd' CollectdGnocchiBatchSize: type: number description: Minimum number of values Gnocchi should batch. default: 10 EnableSQLAlchemyCollectd: type: boolean description: > Set to true to enable the SQLAlchemy-collectd server plugin default: false CollectdSQLAlchemyLogMessages: type: string description: set to "debug" to enable message logging. default: 'info' CollectdSQLAlchemyBindHost: type: string description: > hostname for SQLAlchemy-collectd plugin to bind on. defaults to localhost. default: 'localhost' CollectdAmqpHost: type: string description: Hostname or IP address of the AMQP 1.0 intermediary. default: nil CollectdAmqpPort: type: number description: > Service name or port number on which the AMQP 1.0 intermediary accepts connections. This argument must be a string, even if the numeric form is used. default: 5666 CollectdAmqpUser: type: string description: > User part of credentials used to authenticate to the AMQP 1.0 intermediary. default: guest CollectdAmqpPassword: type: string description: > Password part of credentials used to authenticate to the AMQP 1.0 intermediary. default: guest hidden: true CollectdAmqpTransportName: type: string description: Name of the AMQP 1.0 transport. default: metrics CollectdAmqpAddress: type: string description: > This option specifies the prefix for the send-to value in the message. default: collectd CollectdAmqpInstances: type: json description: > Hash of hashes. Each inner hash represent Instance block in plugin configuration file. Key of outter hash represents instance name. The 'address' value concatenated with the 'name' given will be used as the send-to address for communications over the messaging link. default: {} CollectdAmqpRetryDelay: type: number description: > When the AMQP 1.0 connection is lost, defines the time in seconds to wait before attempting to reconnect. default: 1 CollectdAmqpInterval: type: number description: > Interval on which metrics should be sent to AMQP intermediary. If not set the default for all collectd plugins is used. default: -666 CollectdAmqpSendQueueLimit: type: number description: > Number of data sets to be kept in memory, older sets will be discarded, if set to -1, this feature is disabled. default: -1 CollectdEnableSensubility: type: boolean description: Set to true if sensubility should be executed by exec plugin. default: false CollectdSensubilityExecSudoRule: type: string description: > Given rule will be created in /etc/sudoers.d for sensubility to enable it calling restricted commands via sensubility executor. default: '' CollectdSensubilityLogLevel: type: string description: Use for override the default logging level (WARNING). default: WARNING CollectdSensubilityConnection: type: string description: URL to Sensu sever side default: amqp://sensu:sensu@localhost:5672//sensu CollectdSensubilityKeepaliveInterval: type: number description: Interval in seconds for sending keepalive messages to Sensu server side. default: 20 CollectdSensubilityTmpDir: type: string description: Path to temporary directory which is used for creation of check scripts. default: /var/tmp/collectd-sensubility-checks CollectdSensubilityShellPath: type: string description: Path to shell used for executing check scripts. default: /usr/bin/sh CollectdSensubilityWorkerCount: type: number description: Number of goroutines spawned for executing check scripts. default: 2 CollectdSensubilityChecks: type: json description: JSON formated definition of standalone checks to be scheduled on client side. default: {} CollectdSensubilityTransport: type: string description: Bus type for sent data. Options are 'sensu' (rabbitmq) and 'amqp1' default: sensu CollectdSensubilityResultsChannel: type: string description: AMQP1 channel address default: collectd/notify CollectdEnableContainerHealthCheck: type: boolean description: > Set to false if container health check should not be defined and attached to CollectdEnableContainerHealthCheck. default: true CollectdContainerHealthCheckCommand: type: string default: /scripts/collectd_check_health.py CollectdContainerHealthCheckInterval: type: number description: The frequency in seconds the docker health check is executed. default: 10 CollectdContainerHealthCheckHandlers: default: [] description: The Sensu event handler to use for events created by the docker health check. type: comma_delimited_list CollectdContainerHealthCheckOccurrences: type: number description: The number of event occurrences before sensu-plugin-aware handler should take action. default: 3 CollectdContainerHealthCheckRefresh: type: number description: The number of seconds sensu-plugin-aware handlers should wait before taking second action. default: 90 EnableSTF: type: boolean description: Set to true to enable configuration for STF client. default: false CollectdEnableMcelog: type: boolean description: Set to true to enable mcelog default: false CollectdEnableLibpodstats: type: boolean description: Set to true if collectd should run the libpodstats plugin default: false conditions: amqp_connection: equals: [{get_param: CollectdConnectionType}, 'amqp1'] amqp_default_connection: equals: [{get_param: CollectdAmqpHost}, nil] amqp_default_interval: equals: [{get_param: CollectdAmqpInterval}, -666] amqp_default_send_queue_limit: equals: [{get_param: CollectdAmqpSendQueueLimit}, -1] collectd_connection: equals: [{get_param: CollectdConnectionType}, 'network'] gnocchi_connection: equals: [{get_param: CollectdConnectionType}, 'gnocchi'] gnocchi_auth_basic: equals: [{get_param: CollectdGnocchiAuthMode}, 'basic'] gnocchi_default_connection: and: - equals: [{get_param: CollectdGnocchiServer}, nil] - equals: [{get_param: CollectdGnocchiKeystoneEndpoint}, nil] enable_sensubility: equals: [{get_param: CollectdEnableSensubility}, true] enable_stf: equals: [{get_param: EnableSTF}, true] enable_sqlalchemy_collectd: {equals : [{get_param: EnableSQLAlchemyCollectd}, true]} sensubility_needs_sudo: not: equals: [{get_param: CollectdSensubilityExecSudoRule}, ''] enable_libpodstats: equals: [{get_param: CollectdEnableLibpodstats}, true] resources: ContainersCommon: type: ../containers-common.yaml outputs: role_data: description: Role data for the collectd role. value: service_name: collectd config_settings: map_merge: - tripleo::profile::base::metrics::collectd::enable_file_logging: true collectd::plugin::logfile::log_file: /var/log/collectd/collectd.log - collectd::manage_repo: false collectd::purge: true collectd::recurse: true collectd::purge_config: true collectd::minimum_version: "5.7" collectd::interval: {get_param: CollectdDefaultPollingInterval} collectd::plugin::unixsock::socketgroup: root collectd::plugin::unixsock::socketfile: /run/collectd-socket collectd::plugin::unixsock::deletesocket: true collectd::plugin::cpu::reportbycpu: true collectd::plugin::cpu::reportbystate: true collectd::plugin::cpu::reportnumcpu: false collectd::plugin::cpu::valuespercentage: true collectd::plugin::df::ignoreselected: true collectd::plugin::df::reportbydevice: true collectd::plugin::df::fstypes: ['xfs'] collectd::plugin::load::reportrelative: true collectd::plugin::virt::connection: "qemu:///system" collectd::plugin::virt::extra_stats: list_join: - ' ' - - 'pcpu' - 'cpu_util' - 'vcpupin' - 'vcpu' - 'memory' - 'disk' - 'disk_err' - 'disk_allocation' - 'disk_capacity' - 'disk_physical' - 'domain_state' - 'job_stats_background' - 'perf' collectd::plugin::virt::hostname_format: "hostname" tripleo.collectd.plugins.collectd: yaql: data: default_plugins: {get_param: CollectdDefaultPlugins} stf_plugins: if: - enable_stf - - cpu - df - load - connectivity - intel_rdt - ipmi - procevent - [] extra_plugins: {get_param: CollectdExtraPlugins} expression: > ($.data.default_plugins + $.data.stf_plugins + $.data.extra_plugins) .flatten().distinct() - if: # Collectd connected to QDR - amqp_connection - map_merge: - tripleo::profile::base::metrics::collectd::amqp_transport_name: get_param: CollectdAmqpTransportName tripleo::profile::base::metrics::collectd::amqp_address: get_param: CollectdAmqpAddress tripleo::profile::base::metrics::collectd::amqp_instances: get_param: CollectdAmqpInstances tripleo::profile::base::metrics::collectd::amqp_retry_delay: get_param: CollectdAmqpRetryDelay - if: - amqp_default_interval - {} - tripleo::profile::base::metrics::collectd::amqp_interval: get_param: CollectdAmqpInterval - if: - amqp_default_send_queue_limit - {} - tripleo::profile::base::metrics::collectd::amqp_default_send_queue_limit: get_param: CollectdAmqpSendQueueLimit - if: - amqp_default_connection - tripleo::profile::base::metrics::collectd::amqp_host: str_replace: template: "%{hiera('$NETWORK')}" params: $NETWORK: get_param: - ServiceNetMap - str_replace: template: "ROLENAMEMetricsQdrNetwork" params: ROLENAME: {get_param: RoleName} tripleo::profile::base::metrics::collectd::amqp_port: get_param: MetricsQdrPort tripleo::profile::base::metrics::collectd::amqp_user: get_param: MetricsQdrUsername tripleo::profile::base::metrics::collectd::amqp_password: get_param: MetricsQdrPassword - tripleo::profile::base::metrics::collectd::amqp_host: get_param: CollectdAmqpHost tripleo::profile::base::metrics::collectd::amqp_port: get_param: CollectdAmqpPort tripleo::profile::base::metrics::collectd::amqp_user: get_param: CollectdAmqpUser tripleo::profile::base::metrics::collectd::amqp_password: get_param: CollectdAmqpPassword - {} - if: # Collectd connected to external collectd instance - collectd_connection - tripleo::profile::base::metrics::collectd::collectd_server: get_param: CollectdServer tripleo::profile::base::metrics::collectd::collectd_port: get_param: CollectdServerPort tripleo::profile::base::metrics::collectd::collectd_username: get_param: CollectdUsername tripleo::profile::base::metrics::collectd::collectd_password: get_param: CollectdPassword tripleo::profile::base::metrics::collectd::collectd_securitylevel: get_param: CollectdSecurityLevel - {} - if: - enable_sqlalchemy_collectd - map_merge: - tripleo::profile::base::metrics::collectd::enable_sqlalchemy_collectd: true - tripleo::profile::base::metrics::collectd::sqlalchemy_collectd_bind_host: get_param: CollectdSQLAlchemyBindHost - tripleo::profile::base::metrics::collectd::sqlalchemy_collectd_log_messages: get_param: CollectdSQLAlchemyLogMessages - {} - if: # Collectd connected directly to Gnocchi DB - gnocchi_connection - map_merge: - tripleo::profile::base::metrics::collectd::gnocchi_auth_mode: get_param: CollectdGnocchiAuthMode tripleo::profile::base::metrics::collectd::gnocchi_resource_type: get_param: CollectdGnocchiResourceType tripleo::profile::base::metrics::collectd::gnocchi_batch_size: get_param: CollectdGnocchiBatchSize - if: - gnocchi_default_connection # use overcloud gnocchi instance or user provided instance? - tripleo::profile::base::metrics::collectd::gnocchi_keystone_endpoint: get_param: [EndpointMap, GnocchiInternal, uri] tripleo::profile::base::metrics::collectd::gnocchi_keystone_auth_url: get_param: [EndpointMap, KeystoneInternal, uri_no_suffix] tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_name: 'gnocchi' tripleo::profile::base::metrics::collectd::gnocchi_keystone_password: get_param: GnocchiPassword tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_name: 'service' tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_name: 'Default' tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_name: 'Default' tripleo::profile::base::metrics::collectd::gnocchi_keystone_region_name: get_param: KeystoneRegion - if: - gnocchi_auth_basic # use basic auth or keystone auth? - tripleo::profile::base::metrics::collectd::gnocchi_protocol: get_param: CollectdGnocchiProtocol tripleo::profile::base::metrics::collectd::gnocchi_server: get_param: CollectdGnocchiServer tripleo::profile::base::metrics::collectd::gnocchi_port: get_param: CollectdGnocchiPort tripleo::profile::base::metrics::collectd::gnocchi_user: get_param: CollectdGnocchiUser - tripleo::profile::base::metrics::collectd::gnocchi_keystone_auth_url: get_param: CollectdGnocchiKeystoneAuthUrl tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_name: get_param: CollectdGnocchiKeystoneUserName tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_id: get_param: CollectdGnocchiKeystoneUserId tripleo::profile::base::metrics::collectd::gnocchi_keystone_password: get_param: CollectdGnocchiKeystonePassword tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_id: get_param: CollectdGnocchiKeystoneProjectId tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_name: get_param: CollectdGnocchiKeystoneProjectName tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_id: get_param: CollectdGnocchiKeystoneUserDomainId tripleo::profile::base::metrics::collectd::gnocchi_keystone_user_domain_name: get_param: CollectdGnocchiKeystoneUserDomainName tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_id: get_param: CollectdGnocchiKeystoneProjectDomainId tripleo::profile::base::metrics::collectd::gnocchi_keystone_project_domain_name: get_param: CollectdGnocchiKeystoneProjectDomainName tripleo::profile::base::metrics::collectd::gnocchi_keystone_region_name: get_param: CollectdGnocchiKeystoneRegionName tripleo::profile::base::metrics::collectd::gnocchi_keystone_interface: get_param: CollectdGnocchiKeystoneInterface tripleo::profile::base::metrics::collectd::gnocchi_keystone_endpoint: get_param: CollectdGnocchiKeystoneEndpoint - {} - if: # Collectd should run collectd-sensubility via collectd-exec - enable_sensubility - map_merge: - if: - sensubility_needs_sudo - tripleo::profile::base::metrics::collectd::sensubility::exec_sudo_rule: get_param: CollectdSensubilityExecSudoRule - {} - tripleo::profile::base::metrics::collectd::enable_sensubility: get_param: CollectdEnableSensubility tripleo::profile::base::metrics::collectd::sensubility::connection: get_param: CollectdSensubilityConnection tripleo::profile::base::metrics::collectd::sensubility::log_level: get_param: CollectdSensubilityLogLevel tripleo::profile::base::metrics::collectd::sensubility::client_name: str_replace: template: "%{hiera('fqdn_NETWORK')}" params: NETWORK: get_param: - ServiceNetMap - str_replace: template: "ROLENAMEMetricsQdrNetwork" params: ROLENAME: {get_param: RoleName} tripleo::profile::base::metrics::collectd::sensubility::client_address: str_replace: template: "%{hiera('NETWORK')}" params: NETWORK: get_param: - ServiceNetMap - str_replace: template: "ROLENAMEMetricsQdrNetwork" params: ROLENAME: {get_param: RoleName} tripleo::profile::base::metrics::collectd::sensubility::keepalive_interval: get_param: CollectdSensubilityKeepaliveInterval tripleo::profile::base::metrics::collectd::sensubility::tmp_base_dir: get_param: CollectdSensubilityTmpDir tripleo::profile::base::metrics::collectd::sensubility::shell_path: get_param: CollectdSensubilityShellPath tripleo::profile::base::metrics::collectd::sensubility::worker_count: get_param: CollectdSensubilityWorkerCount tripleo::profile::base::metrics::collectd::sensubility::checks: map_merge: - {get_param: CollectdSensubilityChecks} - check-container-health: standalone: true command: {get_param: CollectdContainerHealthCheckCommand} interval: {get_param: CollectdContainerHealthCheckInterval} handlers: {get_param: CollectdContainerHealthCheckHandlers} occurrences: {get_param: CollectdContainerHealthCheckOccurrences} refresh: {get_param: CollectdContainerHealthCheckRefresh} tripleo::profile::base::metrics::collectd::sensubility::results_channel: get_param: CollectdSensubilityResultsChannel tripleo::profile::base::metrics::collectd::sensubility::transport: get_param: CollectdSensubilityTransport tripleo::profile::base::metrics::collectd::sensubility::amqp_port: get_param: CollectdAmqpPort - {} - if: - enable_libpodstats - tripleo::profile::base::metrics::collectd::enable_libpodstats: get_param: CollectdEnableLibpodstats - {} service_config_settings: {} # BEGIN DOCKER SETTINGS puppet_config: config_volume: collectd puppet_tags: collectd_client_config,exec step_config: include tripleo::profile::base::metrics::collectd config_image: {get_param: ContainerCollectdConfigImage} kolla_config: /var/lib/kolla/config_files/collectd.json: command: /usr/sbin/collectd -f config_files: - source: "/var/lib/kolla/config_files/src/*" dest: "/" merge: true preserve_properties: true - source: "/var/lib/kolla/config_files/src/etc/collectd.d" dest: "/etc/" merge: false preserve_properties: true permissions: - path: /var/log/collectd owner: collectd:collectd recurse: true container_config_scripts: map_merge: - {get_attr: [ContainersCommon, container_config_scripts]} - collectd_check_health.py: mode: "0755" content: { get_file: ../../container_config_scripts/monitoring/collectd_check_health.py } docker_config: step_5: collectd: image: {get_param: ContainerCollectdImage} net: host pid: host user: root restart: always cap_add: - IPC_LOCK healthcheck: test: /openstack/healthcheck volumes: list_concat: - {get_attr: [ContainersCommon, volumes]} - - /var/lib/kolla/config_files/collectd.json:/var/lib/kolla/config_files/config.json:ro - /var/lib/containers/storage/overlay-containers:/var/lib/containers/storage/overlay-containers:ro - /var/lib/config-data/puppet-generated/collectd:/var/lib/kolla/config_files/src:ro - /var/log/containers/collectd:/var/log/collectd:rw,z - /var/lib/container-config-scripts:/scripts:ro - /run/:/run:rw - /sys/fs/cgroup:/sys/fs/cgroup:ro environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS deploy_steps_tasks: - name: set enable_sensubility fact set_fact: enable_sensubility: {get_param: CollectdEnableSensubility} - name: Configure rsyslog for container healthchecks when: - step|int == 1 block: - name: Check if rsyslog exists shell: systemctl list-unit-files --type=service | grep -q rsyslog register: rsyslog_config failed_when: rsyslog_config.rc == 2 - name: Configure if we can when: - rsyslog_config is changed - rsyslog_config.rc == 0 block: - name: Log healthchecks in dedicated file when: - enable_sensubility|bool register: logconfig_add copy: dest: /etc/rsyslog.d/openstack-healthcheck.conf content: | if ($programname startswith 'podman' and ($msg contains 'container exec' or $msg contains 'healthy')) or ($programname startswith 'systemd' and $msg contains 'podman healthcheck run') then -/var/log/containers/collectd/healthchecks.stdout & stop - name: Remove healthcheck log when: - not enable_sensubility|bool register: logconfig_rm file: path: /etc/rsyslog.d/openstack-healthcheck.conf state: absent - name: Reload rsyslogd if needed when: logconfig_add is changed or logconfig_rm is changed service: name: rsyslog state: restarted host_prep_tasks: - name: create persistent directories file: path: "{{ item.path }}" state: directory setype: "{{ item.setype }}" mode: "{{ item.mode }}" with_items: - { 'path': /var/log/containers/collectd, 'setype': container_file_t, 'mode': '0750' } - name: import provision_mcelog include_role: name: tripleo_provision_mcelog when: {get_param: CollectdEnableMcelog}