From 9019ad3529790fcf02216d7439e343e3f0adff7d Mon Sep 17 00:00:00 2001 From: Nikolay Vinogradov Date: Wed, 13 Nov 2019 13:58:19 +0300 Subject: [PATCH] Added Filebeat, NRPE and Telegraf to Microstack - Snapped binary packages of Filebeat, NRPE and Telegraf (disabled by default) - Added W/A of Telegraf segfault after ELF patching by snapcraft - Implemented IPMI input tuning for Telegraf - Allowed to run NRPE as root:root (from custom PPA) - Implemented Filebeat, NRPE and Telegraf control scripts and config on top of snap-overlay - Added support for checking Microstack systemd services by NRPE - Added few generic and Microstack-specific NRPE checks - Added possibility to override default config paths for the daemons - Added support for in-band IPMI input to Telegraf - Stick LMA wrappers and services naming to Microstack conventions - Increase build timeout in .zuul conf by 30min Change-Id: I68dbdb11248cf0c1e22e9333af3cf0f88954f557 --- .zuul.yaml | 2 +- README.md | 4 + checks/check_systemd.py | 38 +++++++ filebeat.pgp.key | 32 ++++++ snap-overlay/bin/set-default-config | 30 ++++++ snap-overlay/snap-openstack.yaml | 30 ++++++ snap-overlay/templates/filebeat.yaml.j2 | 43 ++++++++ snap-overlay/templates/nrpe.cfg.j2 | 56 ++++++++++ snap-overlay/templates/telegraf.conf.j2 | 131 ++++++++++++++++++++++++ snap-wrappers/filebeat/filebeat | 11 ++ snap-wrappers/nrpe/nrpe | 6 ++ snap-wrappers/telegraf/telegraf | 8 ++ snapcraft.yaml | 72 +++++++++++++ telegraf.pgp.key | 28 +++++ tools/init/init/main.py | 1 + tools/init/init/questions/__init__.py | 77 ++++++++++++++ 16 files changed, 568 insertions(+), 1 deletion(-) create mode 100644 checks/check_systemd.py create mode 100644 filebeat.pgp.key create mode 100644 snap-overlay/templates/filebeat.yaml.j2 create mode 100644 snap-overlay/templates/nrpe.cfg.j2 create mode 100644 snap-overlay/templates/telegraf.conf.j2 create mode 100755 snap-wrappers/filebeat/filebeat create mode 100755 snap-wrappers/nrpe/nrpe create mode 100755 snap-wrappers/telegraf/telegraf create mode 100644 telegraf.pgp.key diff --git a/.zuul.yaml b/.zuul.yaml index 9a49852..e79b21a 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -1,7 +1,7 @@ - job: name: microstack-tox-snap-with-sudo parent: openstack-tox-snap-with-sudo - timeout: 5400 + timeout: 7200 nodeset: ubuntu-bionic vars: tox_envlist: snap diff --git a/README.md b/README.md index 3331c13..b28f3ec 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,10 @@ for example, run: sudo microstack.remove --auto --purge +## LMA stack + +Filebeat, Telegraf and NRPE are bundled as the snap systemd services. + ## Customising and contributing To customise services and settings, look in the `.d` directories under diff --git a/checks/check_systemd.py b/checks/check_systemd.py new file mode 100644 index 0000000..6c6f680 --- /dev/null +++ b/checks/check_systemd.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 +# +# Copyright 2016 Canonical Ltd +# +# Author: Brad Marshall +# +# Based on check_upstart_job and https://zignar.net/2014/09/08/getting-started-with-dbus-python-systemd/ +# + +import dbus, sys + +service_arg = sys.argv[1] +service_name = "%s.service" % service_arg + +try: + bus = dbus.SystemBus() + systemd = bus.get_object('org.freedesktop.systemd1', '/org/freedesktop/systemd1') + manager = dbus.Interface(systemd, dbus_interface='org.freedesktop.systemd1.Manager') + try: + service_unit = manager.LoadUnit(service_name) + service_proxy = bus.get_object('org.freedesktop.systemd1', str(service_unit)) + service = dbus.Interface(service_proxy, dbus_interface='org.freedesktop.systemd1.Unit') + service_res = service_proxy.Get('org.freedesktop.systemd1.Unit','SubState', dbus_interface='org.freedesktop.DBus.Properties') + + if service_res == 'running': + print('OK: %s is running' % service_name) + sys.exit(0) + else: + print('CRITICAL: %s is not running' % service_name) + sys.exit(2) + + except dbus.DBusException as e: + print('CRITICAL: unable to find %s in systemd' % service_name) + sys.exit(2) + +except dbus.DBusException as e: + print('CRITICAL: unable to connect to system for %s' % service_name) + sys.exit(2) diff --git a/filebeat.pgp.key b/filebeat.pgp.key new file mode 100644 index 0000000..8b13696 --- /dev/null +++ b/filebeat.pgp.key @@ -0,0 +1,32 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQENBFI3HsoBCADXDtbNJnxbPqB1vDNtCsqhe49vFYsZN9IOZsZXgp7aHjh6CJBD +A+bGFOwyhbd7at35jQjWAw1O3cfYsKAmFy+Ar3LHCMkV3oZspJACTIgCrwnkic/9 +CUliQe324qvObU2QRtP4Fl0zWcfb/S8UYzWXWIFuJqMvE9MaRY1bwUBvzoqavLGZ +j3SF1SPO+TB5QrHkrQHBsmX+Jda6d4Ylt8/t6CvMwgQNlrlzIO9WT+YN6zS+sqHd +1YK/aY5qhoLNhp9G/HxhcSVCkLq8SStj1ZZ1S9juBPoXV1ZWNbxFNGwOh/NYGldD +2kmBf3YgCqeLzHahsAEpvAm8TBa7Q9W21C8vABEBAAG0RUVsYXN0aWNzZWFyY2gg +KEVsYXN0aWNzZWFyY2ggU2lnbmluZyBLZXkpIDxkZXZfb3BzQGVsYXN0aWNzZWFy +Y2gub3JnPokBOAQTAQIAIgUCUjceygIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgEC +F4AACgkQ0n1mbNiOQrRzjAgAlTUQ1mgo3nK6BGXbj4XAJvuZDG0HILiUt+pPnz75 +nsf0NWhqR4yGFlmpuctgCmTD+HzYtV9fp9qW/bwVuJCNtKXk3sdzYABY+Yl0Cez/ +7C2GuGCOlbn0luCNT9BxJnh4mC9h/cKI3y5jvZ7wavwe41teqG14V+EoFSn3NPKm +TxcDTFrV7SmVPxCBcQze00cJhprKxkuZMPPVqpBS+JfDQtzUQD/LSFfhHj9eD+Xe +8d7sw+XvxB2aN4gnTlRzjL1nTRp0h2/IOGkqYfIG9rWmSLNlxhB2t+c0RsjdGM4/ +eRlPWylFbVMc5pmDpItrkWSnzBfkmXL3vO2X3WvwmSFiQbkBDQRSNx7KAQgA5JUl +zcMW5/cuyZR8alSacKqhSbvoSqqbzHKcUQZmlzNMKGTABFG1yRx9r+wa/fvqP6OT +RzRDvVS/cycws8YX7Ddum7x8uI95b9ye1/Xy5noPEm8cD+hplnpU+PBQZJ5XJ2I+ +1l9Nixx47wPGXeClLqcdn0ayd+v+Rwf3/XUJrvccG2YZUiQ4jWZkoxsA07xx7Bj+ +Lt8/FKG7sHRFvePFU0ZS6JFx9GJqjSBbHRRkam+4emW3uWgVfZxuwcUCn1ayNgRt +KiFv9jQrg2TIWEvzYx9tywTCxc+FFMWAlbCzi+m4WD+QUWWfDQ009U/WM0ks0Kww +EwSk/UDuToxGnKU2dQARAQABiQEfBBgBAgAJBQJSNx7KAhsMAAoJENJ9ZmzYjkK0 +c3MIAIE9hAR20mqJWLcsxLtrRs6uNF1VrpB+4n/55QU7oxA1iVBO6IFu4qgsF12J +TavnJ5MLaETlggXY+zDef9syTPXoQctpzcaNVDmedwo1SiL03uMoblOvWpMR/Y0j +6rm7IgrMWUDXDPvoPGjMl2q1iTeyHkMZEyUJ8SKsaHh4jV9wp9KmC8C+9CwMukL7 +vM5w8cgvJoAwsp3Fn59AxWthN3XJYcnMfStkIuWgR7U2r+a210W6vnUxU4oN0PmM +cursYPyeV0NX/KQeUeNMwGTFB6QHS/anRaGQewijkrYYoTNtfllxIu9XYmiBERQ/ +qPDlGRlOgVTd9xUfHFkzB52c70E= +=92oX +-----END PGP PUBLIC KEY BLOCK----- + diff --git a/snap-overlay/bin/set-default-config b/snap-overlay/bin/set-default-config index 4b5930a..2b3f358 100755 --- a/snap-overlay/bin/set-default-config +++ b/snap-overlay/bin/set-default-config @@ -59,3 +59,33 @@ snapctl set \ config.cleanup.delete-bridge=true \ config.cleanup.remove=true \ ; + +# Filebeat +snapctl set \ + config.logging.datatag="" \ + config.logging.host="localhost:5044" \ + config.logging.custom-config="$SNAP_COMMON/etc/filebeat/filebeat-microstack.yaml" \ + config.services.extra.enabled=false \ + config.services.extra.filebeat=false \ + ; + +# services won't start if disabled in install hook +# see https://github.com/snapcore/snapd/blob/53dd10a71d1754610eda2d3d776465b81b3281cd/wrappers/services.go#L139 +snapctl stop --disable ${SNAP_NAME}.filebeat + +# NRPE +snapctl set \ + config.alerting.custom-config="$SNAP_COMMON/etc/nrpe/nrpe-microstack.cfg" \ + config.services.extra.nrpe=false \ + ; + +snapctl stop --disable ${SNAP_NAME}.telegraf + +# Telegraf +snapctl set \ + config.monitoring.ipmi="" \ + config.monitoring.custom-config="$SNAP_COMMON/etc/telegraf/telegraf-microstack.conf" \ + config.services.extra.telegraf=false \ + ; + +snapctl stop --disable ${SNAP_NAME}.nrpe diff --git a/snap-overlay/snap-openstack.yaml b/snap-overlay/snap-openstack.yaml index d8bda03..3037601 100644 --- a/snap-overlay/snap-openstack.yaml +++ b/snap-overlay/snap-openstack.yaml @@ -56,6 +56,10 @@ setup: neutron.conf.d.rabbitmq.conf.j2: "{snap_common}/etc/neutron/neutron.conf.d/rabbitmq.conf" rabbitmq.conf.j2: "{snap_common}/etc/rabbitmq/rabbitmq.config" + # LMA stack templates + telegraf.conf.j2: "{snap_common}/etc/telegraf/telegraf-microstack.conf" + nrpe.cfg.j2: "{snap_common}/etc/nrpe/nrpe-microstack.cfg" + filebeat.yaml.j2: "{snap_common}/etc/filebeat/filebeat-microstack.yaml" chmod: "{snap_common}/instances": 0755 "{snap_common}/etc/microstack.rc": 0644 @@ -74,6 +78,11 @@ setup: dashboard_port: 'config.network.ports.dashboard' mysql_port: 'config.network.ports.mysql' rabbit_port: 'config.network.ports.rabbit' + logging_tag: 'config.logging.logging.tag' + logging_host: 'config.logging.host' + monitoring_tag: 'config.monitoring.tag' + monitoring_ipmi: 'config.monitoring.ipmi' + alerting_tag: 'config.alerting.tag' entry_points: keystone-manage: binary: "{snap}/bin/keystone-manage" @@ -312,3 +321,24 @@ entry_points: - "{snap_common}/etc/horizon/horizon.conf.d" templates: horizon.ini.j2: "{snap_common}/etc/horizon/uwsgi/snap/horizon.ini" + filebeat: + binary: "{snap}/bin/filebeat.sh" + type: simple + config-dirs: + - "{snap_common}/lma/filebeat" + templates: + filebeat.yml.j2: "{snap_common}/etc/filebeat/filebeat.yml" + nrpe: + binary: "{snap}/bin/nrpe.sh" + type: simple + config-dirs: + - "{snap_common}/lma/nrpe" + templates: + nrpe.conf.j2: "{snap_common}/etc/nrpe/nrpe.conf" + telegraf: + binary: "{snap}/bin/telegraf" + type: simple + config-dirs: + - "{snap_common}/lma/telegraf" + templates: + telegraf.conf.j2: "{snap_common}/etc/telegraf/telegraf.conf" diff --git a/snap-overlay/templates/filebeat.yaml.j2 b/snap-overlay/templates/filebeat.yaml.j2 new file mode 100644 index 0000000..577c94f --- /dev/null +++ b/snap-overlay/templates/filebeat.yaml.j2 @@ -0,0 +1,43 @@ +filebeat: + prospectors: + - paths: + - /var/log/syslog + - /var/log/*/*.log + - {{ snap_common }}/log + - {{ snap_common }}/log/mysql + - {{ snap_common }}/log/openvswitch + - {{ snap_common }}/log/rabbitmq + + input_type: log + exclude_files: ["/filebeat.*", ".*gz$"] + exclude_lines: [] + scan_frequency: 10s + harvester_buffer_size: 16384 + max_bytes: 10485760 + + registry_file: filebeat/registry + +logging: + to_syslog: true + to_files: false + level: info + metrics.enabled: false + files: + path: {{ snap_common }}/log + name: filebeat + keepfiles: 7 + permissions: 0644 + +output: + + logstash: + hosts: {{ ( logging_host.split(';') if logging_host else [] ) | tojson }} + + worker: 1 + compression_level: 3 + loadbalance: true + +{% if logging_tag %} +# if name is empty, hostname will be used +name: {{logging_tag}} +{% endif %} diff --git a/snap-overlay/templates/nrpe.cfg.j2 b/snap-overlay/templates/nrpe.cfg.j2 new file mode 100644 index 0000000..abc002f --- /dev/null +++ b/snap-overlay/templates/nrpe.cfg.j2 @@ -0,0 +1,56 @@ +log_facility=daemon +log_file={{ snap_common }}/log/nrpe.log + +server_port=5666 + +# WARNING: 0.0.0.0/0 is not supported, just comment out 'allowed_hosts' +#allowed_hosts=0.0.0.0/0 +#allowed_hosts=10.0.0.0/8,127.0.0.1 + +nrpe_user=root +nrpe_group=root +dont_blame_nrpe=0 +debug=0 +pid_file={{ snap_common }}/run/nrpe.pid + +# All configuration snippets go into nrpe.d/ +command[check_users]={{ snap }}/usr/lib/nagios/plugins/check_users -w 5 -c 10 +command[check_load]={{ snap }}/usr/lib/nagios/plugins/check_load -r -w 2.0,1.0,0.5 -c 4.0,2.0,1.0 +#command[check_sda1]={{ snap }}/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1 +command[check_all_fs]={{ snap }}/usr/lib/nagios/plugins/check_disk -l -X tmpfs -X squashfs -X proc -X sysfs -X devtmpfs -X lxcfs -X hugtlbfs +command[check_swap]={{ snap }}/usr/lib/nagios/plugins/check_swap -n ok -w 5 -c 1 + +command[check_zombie_procs]={{ snap }}/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z +command[check_total_procs]={{ snap }}/usr/lib/nagios/plugins/check_procs -w 220 -c 300 +command[check_rabbitmq_server]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.rabbitmq-server +command[check_cluster_server]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.cluster-server +#command[check_external_bridge]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.icrostack.external-bridge +command[check_glance_api]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.glance-api +command[check_horizon_uwsgi]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.horizon-uwsgi +command[check_keystone_uwsgi]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.keystone-uwsgi +command[check_libvirtd]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.libvirtd +command[check_memcached]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.memcached +command[check_mysqld]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.mysqld +command[check_neutron_api]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.neutron-api +command[check_neutron_dhcp_agent]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.neutron-dhcp-agent +command[check_neutron_l3_agent]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.neutron-l3-agent +command[check_neutron_metadata_agent]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.neutron-metadata-agent +command[check_neutron_openvswitch_agent]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.neutron-openvswitch-agent +command[check_nginx]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nginx +command[check_nova_api]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-api +command[check_nova_api_metadata]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-api-metadata +command[check_nova_compute]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-compute +command[check_nova_conductor]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-conductor +command[check_nova_scheduler]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-scheduler +command[check_nova_uwsgi]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.nova-uwsgi +command[check_ovs_vswitchd]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.ovs-vswitchd +command[check_ovsdb_server]=python3 {{ snap }}/usr/lib/nagios/plugins/check_systemd.py snap.microstack.ovsdb-server + +#command[check_cpu_stats]={{ snap }}/usr/lib/nagios/plugins/check_cpu_stats.sh $arg1$ +#command[check_users]={{ snap }}/usr/lib/nagios/plugins/check_users $ARG1$ +#command[check_load]={{ snap }}/usr/lib/nagios/plugins/check_load $ARG1$ +#command[check_disk]={{ snap }}/usr/lib/nagios/plugins/check_disk $arg1$ +#command[check_swap]={{ snap }}/usr/lib/nagios/plugins/check_swap $arg1$ +#command[check_mem]={{ snap }}/usr/lib/nagios/plugins/custom_check_mem -n $arg1$ + +include_dir={{ snap_common }}/nrpe/nrpe.conf.d diff --git a/snap-overlay/templates/telegraf.conf.j2 b/snap-overlay/templates/telegraf.conf.j2 new file mode 100644 index 0000000..b498294 --- /dev/null +++ b/snap-overlay/templates/telegraf.conf.j2 @@ -0,0 +1,131 @@ +# Telegraf configuration + +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. + +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. + +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. + +# Global tags can be specified here in key="value" format. +[tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + +# Configuration for telegraf agent +[agent] + # Default data collection interval for all plugins + interval = "10s" + # Rounds collection interval to 'interval' + # ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + # Telegraf will cache metric_buffer_limit metrics for each output, and will + # flush this buffer on a successful write. + metric_buffer_limit = 10000 + + # Collection jitter is used to jitter the collection by a random amount. + # Each plugin will sleep for a random time within jitter before collecting. + # This can be used to avoid many plugins querying things like sysfs at the + # same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + # Default data flushing interval for all outputs. You should not set this below + # interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + # Jitter the flush interval by a random amount. This is primarily to avoid + # large write spikes for users running a large number of telegraf instances. + # ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + # Run telegraf in debug mode + debug = false + # Run telegraf in quiet mode + quiet = false + # Override default hostname, if empty use os.Hostname() + hostname = "" + +############################################################################### +# OUTPUTS # +############################################################################### + +[[outputs.prometheus_client]] + listen = ":9103" + +############################################################################### +# INPUTS # +############################################################################### + +# Read metrics about cpu usage +[[inputs.cpu]] + # Whether to report per-cpu stats or not + percpu = false + # Whether to report total system cpu stats or not + totalcpu = true + # Comment this line if you want the raw CPU time metrics + drop = ["time_*"] + +# Read metrics about disk usage by mount point +[[inputs.disk]] + # By default, telegraf gather stats for all mountpoints. + # Setting mountpoints will restrict the stats to the specified mountpoints. + # mount_points=["/"] + +# Read metrics about disk IO by device +[[inputs.diskio]] + # By default, telegraf will gather stats for all devices including + # disk partitions. + # Setting devices will restrict the stats to the specified devices. + # devices = ["sda", "sdb"] + # Uncomment the following line if you do not need disk serial numbers. + # skip_serial_number = true + +# Read metrics about memory usage +[[inputs.mem]] + # no configuration + +# Read metrics about network interface usage +[[inputs.net]] + # By default, telegraf gathers stats from any up interface (excluding loopback) + # Setting interfaces will tell it to gather these explicit interfaces, + # regardless of status. + # + # interfaces = ["eth0", ... ] + +# Read metrics about TCP status such as established, time wait etc and UDP sockets counts. +[[inputs.netstat]] + # no configuration + +# Read metrics about swap memory usage +[[inputs.swap]] + # no configuration + +# Read metrics about system load & uptime +[[inputs.system]] + # no configuration + +[[inputs.bond]] + # no configuration + +[[inputs.cgroup]] + # no configuration + +[[inputs.exec]] +commands = [ + "/usr/bin/awk '{ print $1 }' /proc/sys/fs/file-nr" +] +data_format = "value" + +{% if monitoring_ipmi %} +[[inputs.ipmi_sensor]] +path = "{{snap}}/usr/bin/ipmitool" +interval = "60s" +timeout = "60s" +metric_version = 2 +privilege = "ADMINISTRATOR" +{% if monitoring_ipmi != 'in-band' %} +servers = [ "{{ monitoring_ipmi }}" ] +{% endif %} +{% endif %} diff --git a/snap-wrappers/filebeat/filebeat b/snap-wrappers/filebeat/filebeat new file mode 100755 index 0000000..878b845 --- /dev/null +++ b/snap-wrappers/filebeat/filebeat @@ -0,0 +1,11 @@ +#!/bin/bash + +mkdir -p $SNAP_COMMON/var/filebeat +mkdir -p $SNAP_COMMON/var/log/filebeat +mkdir -p $SNAP_COMMON/etc/filebeat/filebeat.conf.d + +$SNAP/usr/share/filebeat/bin/filebeat -c "$(snapctl get config.logging.custom-config)" \ + -path.home $SNAP/usr/share/filebeat/ \ + -path.config $SNAP_COMMON/etc/filebeat/filebeat.conf.d \ + -path.data $SNAP_COMMON/var/filebeat \ + -path.logs $SNAP_COMMON/var/log/filebeat diff --git a/snap-wrappers/nrpe/nrpe b/snap-wrappers/nrpe/nrpe new file mode 100755 index 0000000..bf4c42d --- /dev/null +++ b/snap-wrappers/nrpe/nrpe @@ -0,0 +1,6 @@ +#!/bin/bash + +# this directory doesn't need to exist for nrpe to start +mkdir -p $SNAP_COMMON/nrpe/nrpe.conf.d + +$SNAP/usr/sbin/nrpe -c $(snapctl get config.alerting.custom-config) -d -f diff --git a/snap-wrappers/telegraf/telegraf b/snap-wrappers/telegraf/telegraf new file mode 100755 index 0000000..a12f6de --- /dev/null +++ b/snap-wrappers/telegraf/telegraf @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p $SNAP_COMMON/etc/telegraf/telegraf.d + +# FIXME: set -path.config to correct folder +$SNAP/usr/bin/telegraf --config "$(snapctl get config.monitoring.custom-config)" \ + --config-directory $SNAP_COMMON/etc/telegraf/telegraf.d \ + --pidfile $SNAP_COMMON/run/telegraf.pid diff --git a/snapcraft.yaml b/snapcraft.yaml index 4841747..323fa3f 100644 --- a/snapcraft.yaml +++ b/snapcraft.yaml @@ -367,6 +367,19 @@ apps: ovs-alternatives: command: bin/ovs-alternatives + filebeat: + # this is to avoid conflict with filebeat package + command: bin/filebeat + daemon: simple + + nrpe: + command: bin/nrpe + daemon: simple + + telegraf: + command: bin/telegraf + daemon: simple + parts: dpdk: plugin: make @@ -969,3 +982,62 @@ parts: requirements: - requirements.txt source: tools/cluster + + ### LMA stack ### + filebeat: + plugin: dump + source: ./snap-wrappers/filebeat + after: [ lma-build-prep, qemu, libvirt ] + stage-packages: [ filebeat ] + organize: + filebeat: bin/filebeat + + nrpe: + plugin: dump + source: ./snap-wrappers/nrpe + organize: + nrpe: bin/nrpe + after: [ lma-build-prep ] + stage-packages: [ nagios-nrpe-server, nagios-nrpe-plugin, monitoring-plugins, monitoring-plugins-basic, monitoring-plugins-common ] + + telegraf: + plugin: dump + source: ./snap-wrappers/telegraf + organize: + telegraf: bin/telegraf + # we use autotools plugin here because it brings git, make + # otherwise we'd have to add respective packages + after: [ lma-build-prep, qemu, libvirt ] + # See https://forum.snapcraft.io/t/patchelf-broke-my-binary/4928 + # and https://bugs.launchpad.net/snapcraft/+bug/1753995 + build-attributes: [no-patchelf] + stage-packages: [ telegraf, iproute2, bridge-utils, ethtool, fancontrol, libatm1, ipmitool, freeipmi-tools ] + + lma-build-prep: + plugin: nil + source: "" + override-build: | + cat ../../../project/filebeat.pgp.key | apt-key add - + cat ../../../project/telegraf.pgp.key | apt-key add - + apt-get install apt-transport-https + echo 'deb https://artifacts.elastic.co/packages/5.x/apt stable main' > /etc/apt/sources.list.d/elastic-5.x.list + echo 'deb http://ppa.launchpad.net/telegraf-devs/ppa/ubuntu bionic main' > /etc/apt/sources.list.d/telegraf.x.list + # this needs to be re-worked in order to be built in-place + echo 'deb http://ppa.launchpad.net/nikolay.vinogradov/nrpe-root/ubuntu bionic main' > /etc/apt/sources.list.d/nrpe-root.list + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F767C80E0FC5C835 + apt-get update --allow-unauthenticated + + #lma-configs: + # plugin: dump + # source: ./lma + # after: [ lma-build-prep ] + # organize: + # 'config/nrpe': etc/nrpe + # 'config/telegraf': etc/telegraf + + + checks: + plugin: dump + source: ./checks + organize: + check_systemd.py: usr/lib/nagios/plugins/check_systemd.py diff --git a/telegraf.pgp.key b/telegraf.pgp.key new file mode 100644 index 0000000..71a222d --- /dev/null +++ b/telegraf.pgp.key @@ -0,0 +1,28 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: SKS 1.1.6 +Comment: Hostname: keyserver.ubuntu.com + +mQINBFcVSuIBEAC80aj0tAQ6+NhGV/bkSwu6Oj+BpDR50Be3uBv7ttdtvChL5zHTnaxjdK3h +LKSyrDLlmSOkffQ2uO7CxvqeF09MsHhyvrDDx0EY54//xxoAB++PoB2OQqmqldg3Al5Hp4Dz +rllV5CIX5PD8NGX8UpO3HXk5wEwn9G81l8cia3vPveU82EIkHMiJGpk6+L86OMlwXzxkSI3M +xXgNFKQc+ELDYLvGSseYC9vPN3kdmFoo/UjznPPE4fxr4bXit3N8Abl1jYjBa0x6SWkK1BAb +s8w3BXtvyk90z9Oyme69wPD4zAYfFp+kN2nDmTDBMtNCyMu9oatdI5SukMNK4Lcm8eAE6VNs +04j7BKvGk9+17M8WP9Pw8nIisOwScS9gUlJlLUpnBaJ+sxoOvGQ4mzZxYMKzJh0E58aEX3bS +AyzQfsae8bZLNOTcgotyzzIDJFF9npzu3wmKjeOt/706p4LiDqKUbQK6cI+QcJ/y80ZUK8pB +M043ttSHWLmTBFX2drp6zQGae9+02fX89ZD+5c+MPlubJMYCCKkvQT4OssHfC+dVDQ66rwUy +OObrzsVgikdpIxQVitL3J+Dms56xAkdFfoo+qdxxdv9S/eakc5mfavc/4WVvmFDaJiqJnJRR +Ryw1zApRtuweEEdVn8niy1mahoKpWaw1pTI4AazjWI6xJH1JyQARAQABtB9MYXVuY2hwYWQg +UFBBIGZvciBUZWxlZ3JhZiBEZXZziQI4BBMBAgAiBQJXFUriAhsDBgsJCAcDAgYVCAIJCgsE +FgIDAQIeAQIXgAAKCRDxDL4ByUQG9UgbEACa4IzdeYxH/S5I6MrZfvWNo/JTZ/MZWDD+QlMW +60ThAemCUSE+NJvZZ1q7ovGFpYnHJT9GQXOwJAX1quDUqyM1uXNmLlOyIVNnmjUTINoLhw2V +iC8E7dMWC9w4Na2fKezmNHH00kNl43ncstIjjZ3pLnDGYm1y0ItiCUcTRgHhx2cUZ/vStz1S +Pdqj4P3i8vuspoYJ2T3VPlM/0G+u9Yjuy3Uzu9RugOyO3UJPoi3+4O2VTNosSBy5MILVCp49 +eigyFVGpq5sT/c86qd1zqmsNWEubrlzDfETS4LMj9epr46ZKPXGQkeryt1m2Oe0HkIdNZ+IQ +5p+i9fnEy7/1uKTXWQYsg2UWsLA2PvTvwY8JxxMhUFgv12q2w7STntqJyi9PLItYNtbtKoS3 +XZCCMqQLCWMXHY+2ol6rRSfs06H/wzlR8LjDaEXkDVuDmqMtcbgTboZYblsGxst7I/Y4Wgfi +J52uiIyobQ69uJbG0XeRTLZ3WyrBkopEsTX/+sQjVqbADXYU4hBVDgnCf2uN/5dcwSEvDj8/ ++WsToAfEJkscRBsQjTLVzf+eFqHLrbqz/yoYIqBc//IJMBSbxIf5mrOHHLdbOuMCB6PVwpTI +vLFOSDNPuVDX+S1goA8KJTnXpm8jWDynn3XaXx3AlYw4iZ0ETSgQLQLRd6JuPOEGXsGdBA== +=ufaX +-----END PGP PUBLIC KEY BLOCK----- + diff --git a/tools/init/init/main.py b/tools/init/init/main.py index 666f6f6..14356f3 100644 --- a/tools/init/init/main.py +++ b/tools/init/init/main.py @@ -127,6 +127,7 @@ def init() -> None: questions.KeyPair(), questions.SecurityRules(), questions.PostSetup(), + questions.ExtraServicesQuestion(), ] for question in question_list: diff --git a/tools/init/init/questions/__init__.py b/tools/init/init/questions/__init__.py index 414b030..2b5d371 100644 --- a/tools/init/init/questions/__init__.py +++ b/tools/init/init/questions/__init__.py @@ -780,3 +780,80 @@ class PostSetup(Question): check('snapctl', 'set', 'initialized=true') log.info('Complete. Marked microstack as initialized!') + + +class SimpleServiceQuestion(Question): + + def yes(self, answer: str) -> None: + log.info('enabling and starting ' + self.__class__.__name__) + + for service in self.services: + check('snapctl', 'start', '--enable', service) + + log.info(self.__class__.__name__ + ' enabled') + + def no(self, answer): + for service in self.services: + check('snapctl', 'stop', '--disable', service) + + +class ExtraServicesQuestion(Question): + + _type = 'boolean' + _question = 'Do you want to setup extra services?' + config_key = 'config.services.extra.enabled' + interactive = True + + def yes(self, answer: bool): + questions = [ + Filebeat(), + Telegraf(), + Nrpe(), + ] + + for question in questions: + if not self.interactive: + question.interactive = False + question.ask() + + def no(self, answer: bool): + pass + + +class Filebeat(SimpleServiceQuestion): + _type = 'boolean' + _question = 'Do you want to enable Filebeat?' + config_key = 'config.services.extra.filebeat' + interactive = True + + @property + def services(self): + return [ + '{SNAP_INSTANCE_NAME}.filebeat'.format(**_env) + ] + + +class Telegraf(SimpleServiceQuestion): + _type = 'boolean' + _question = 'Do you want to enable Telegraf?' + config_key = 'config.services.extra.telegraf' + interactive = True + + @property + def services(self): + return [ + '{SNAP_INSTANCE_NAME}.telegraf'.format(**_env) + ] + + +class Nrpe(SimpleServiceQuestion): + _type = 'boolean' + _question = 'Do you want to enable NRPE?' + config_key = 'config.services.extra.nrpe' + interactive = True + + @property + def services(self): + return [ + '{SNAP_INSTANCE_NAME}.nrpe'.format(**_env) + ]