2014-09-16 16:15:49 -06:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
""" Persister
|
|
|
|
The Persister reads metrics and alarms from Kafka and then stores them
|
|
|
|
in InfluxDB.
|
|
|
|
|
|
|
|
Start the perister as stand-alone process by running 'persister.py
|
|
|
|
--config-file <config file>'
|
|
|
|
|
|
|
|
Also able to use Openstack service to start the persister.
|
|
|
|
"""
|
2014-10-03 09:38:12 -06:00
|
|
|
|
|
|
|
import abc
|
2014-10-02 08:33:19 -06:00
|
|
|
from datetime import datetime
|
2014-09-16 16:15:49 -06:00
|
|
|
import json
|
2014-11-19 15:20:27 -07:00
|
|
|
import os
|
2014-10-03 09:38:12 -06:00
|
|
|
import six
|
2014-09-16 16:15:49 -06:00
|
|
|
import sys
|
2014-10-02 08:33:19 -06:00
|
|
|
import threading
|
|
|
|
import urllib
|
|
|
|
|
|
|
|
from influxdb import InfluxDBClient
|
|
|
|
from kafka import KafkaClient
|
|
|
|
from kafka import SimpleConsumer
|
2014-09-16 16:15:49 -06:00
|
|
|
from oslo.config import cfg
|
|
|
|
|
|
|
|
from openstack.common import log
|
|
|
|
from openstack.common import service as os_service
|
|
|
|
import service
|
|
|
|
|
2014-10-02 08:33:19 -06:00
|
|
|
|
2014-09-16 16:15:49 -06:00
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
kafka_opts = [cfg.StrOpt('uri'), cfg.StrOpt('alarm_history_group_id'),
|
|
|
|
cfg.StrOpt('alarm_history_topic'),
|
|
|
|
cfg.StrOpt('alarm_history_consumer_id'),
|
|
|
|
cfg.StrOpt('alarm_history_client_id'),
|
|
|
|
cfg.IntOpt('alarm_batch_size'),
|
|
|
|
cfg.IntOpt('alarm_max_wait_time_seconds'),
|
|
|
|
cfg.StrOpt('metrics_group_id'), cfg.StrOpt('metrics_topic'),
|
|
|
|
cfg.StrOpt('metrics_consumer_id'),
|
|
|
|
cfg.StrOpt('metrics_client_id'),
|
|
|
|
cfg.IntOpt('metrics_batch_size'),
|
|
|
|
cfg.IntOpt('metrics_max_wait_time_seconds')]
|
|
|
|
|
|
|
|
kafka_group = cfg.OptGroup(name='kafka', title='kafka')
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
cfg.CONF.register_group(kafka_group)
|
|
|
|
cfg.CONF.register_opts(kafka_opts, kafka_group)
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
influxdb_opts = [cfg.StrOpt('database_name'), cfg.StrOpt('ip_address'),
|
|
|
|
cfg.StrOpt('port'), cfg.StrOpt('user'),
|
|
|
|
cfg.StrOpt('password')]
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
influxdb_group = cfg.OptGroup(name='influxdb', title='influxdb')
|
2014-09-16 16:15:49 -06:00
|
|
|
cfg.CONF.register_group(influxdb_group)
|
|
|
|
cfg.CONF.register_opts(influxdb_opts, influxdb_group)
|
|
|
|
|
|
|
|
cfg.CONF(sys.argv[1:])
|
|
|
|
|
|
|
|
log_levels = (cfg.CONF.default_log_levels)
|
|
|
|
cfg.set_defaults(log.log_opts, default_log_levels=log_levels)
|
|
|
|
log.setup("monasca-perister")
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
metric_persister = MetricPersister(cfg.CONF)
|
2014-09-16 16:15:49 -06:00
|
|
|
alarm_persister = AlarmPersister(cfg.CONF)
|
2014-11-19 09:57:37 -07:00
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
metric_persister.start()
|
|
|
|
alarm_persister.start()
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
|
|
|
|
class Persister(os_service.Service):
|
|
|
|
"""Class used with Openstack service.
|
|
|
|
"""
|
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
def __init__(self, threads=1):
|
2014-11-19 09:57:37 -07:00
|
|
|
super(Persister, self).__init__(threads)
|
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
def start(self):
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
2014-11-19 09:57:37 -07:00
|
|
|
main()
|
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
LOG.info("**********************************************************")
|
|
|
|
LOG.info("Persister started successfully")
|
|
|
|
LOG.info("**********************************************************")
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-11-19 15:20:27 -07:00
|
|
|
except Exception:
|
|
|
|
LOG.exception('Persister encountered fatal error. Shutting down.')
|
|
|
|
os._exit(1)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
@six.add_metaclass(abc.ABCMeta)
|
|
|
|
class AbstractPersister(threading.Thread):
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
def __init__(self, consumer, influxdb_client, max_wait_time_secs,
|
|
|
|
batch_size):
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
super(AbstractPersister, self).__init__()
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
self._consumer = consumer
|
|
|
|
self._influxdb_client = influxdb_client
|
|
|
|
self._max_wait_time_secs = max_wait_time_secs
|
|
|
|
self._batch_size = batch_size
|
2014-10-03 09:38:12 -06:00
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
self._json_body = []
|
|
|
|
self._last_flush = datetime.now()
|
2014-10-03 09:38:12 -06:00
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
def process_message(self, message):
|
|
|
|
pass
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
def flush(self):
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
if self._json_body:
|
|
|
|
self._influxdb_client.write_points(self._json_body)
|
|
|
|
self._consumer.commit()
|
|
|
|
self._json_body = []
|
|
|
|
self._last_flush = datetime.now()
|
2014-10-02 08:33:19 -06:00
|
|
|
|
2014-09-16 16:15:49 -06:00
|
|
|
try:
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
while True:
|
2014-10-02 08:33:19 -06:00
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
delta_time = datetime.now() - self._last_flush
|
|
|
|
if delta_time.seconds > self._max_wait_time_secs:
|
2014-10-03 09:38:12 -06:00
|
|
|
flush(self)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
for message in self._consumer:
|
|
|
|
self._json_body.append(self.process_message(message))
|
|
|
|
if len(self._json_body) % self._batch_size == 0:
|
2014-10-03 09:38:12 -06:00
|
|
|
flush(self)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
except Exception:
|
|
|
|
LOG.exception(
|
2014-11-19 15:20:27 -07:00
|
|
|
'Persister encountered fatal exception processing messages. Shutting down all threads and exiting')
|
|
|
|
os._exit(1)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
class AlarmPersister(AbstractPersister):
|
|
|
|
"""Class for persisting alarms.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, conf):
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
kafka = KafkaClient(conf.kafka.uri)
|
|
|
|
consumer = SimpleConsumer(kafka, conf.kafka.alarm_history_group_id,
|
|
|
|
conf.kafka.alarm_history_topic,
|
|
|
|
auto_commit=False, iter_timeout=1)
|
|
|
|
|
|
|
|
influxdb_client = InfluxDBClient(conf.influxdb.ip_address,
|
|
|
|
conf.influxdb.port,
|
|
|
|
conf.influxdb.user,
|
|
|
|
conf.influxdb.password,
|
|
|
|
conf.influxdb.database_name)
|
|
|
|
|
|
|
|
max_wait_time_secs = conf.kafka.alarm_max_wait_time_seconds
|
|
|
|
batch_size = conf.kafka.alarm_batch_size
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
super(AlarmPersister, self).__init__(consumer, influxdb_client,
|
|
|
|
max_wait_time_secs, batch_size)
|
|
|
|
|
|
|
|
def process_message(self, message):
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug(message.message.value.decode('utf8'))
|
|
|
|
|
|
|
|
decoded = json.loads(message.message.value)
|
|
|
|
LOG.debug(json.dumps(decoded, sort_keys=True, indent=4))
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
alarm_transitioned = decoded['alarm-transitioned']
|
|
|
|
|
|
|
|
actions_enabled = alarm_transitioned['actionsEnabled']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('actions enabled: %s', actions_enabled)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
alarm_description = alarm_transitioned['alarmDescription']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('alarm description: %s', alarm_description)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
alarm_id = alarm_transitioned['alarmId']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('alarm id: %s', alarm_id)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
alarm_definition_id = alarm_transitioned[
|
2014-10-03 09:38:12 -06:00
|
|
|
'alarmDefinitionId']
|
|
|
|
LOG.debug('alarm definition id: %s', alarm_definition_id)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
metrics = alarm_transitioned['metrics']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('metrics: %s', metrics)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
alarm_name = alarm_transitioned['alarmName']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('alarm name: %s', alarm_name)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
new_state = alarm_transitioned['newState']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('new state: %s', new_state)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
old_state = alarm_transitioned['oldState']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('old state: %s', old_state)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
state_change_reason = alarm_transitioned[
|
2014-10-03 09:38:12 -06:00
|
|
|
'stateChangeReason']
|
|
|
|
LOG.debug('state change reason: %s', state_change_reason)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
tenant_id = alarm_transitioned['tenantId']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('tenant id: %s', tenant_id)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
time_stamp = alarm_transitioned['timestamp']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('time stamp: %s', time_stamp)
|
|
|
|
|
|
|
|
data = {"points": [[time_stamp, '{}', tenant_id.encode('utf8'),
|
|
|
|
alarm_id.encode('utf8'),
|
|
|
|
alarm_definition_id.encode('utf8'),
|
|
|
|
json.dumps(metrics, ensure_ascii=False).encode(
|
|
|
|
'utf8'), old_state.encode('utf8'),
|
|
|
|
new_state.encode('utf8'),
|
|
|
|
state_change_reason.encode('utf8')]],
|
|
|
|
"name": 'alarm_state_history',
|
|
|
|
"columns": ["time", "reason_data", "tenant_id", "alarm_id",
|
|
|
|
"alarm_definition_id", "metrics", "old_state",
|
|
|
|
"new_state", "reason"]}
|
|
|
|
|
|
|
|
LOG.debug(data)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
class MetricPersister(AbstractPersister):
|
2014-09-16 16:15:49 -06:00
|
|
|
"""Class for persisting metrics.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, conf):
|
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
kafka = KafkaClient(conf.kafka.uri)
|
|
|
|
consumer = SimpleConsumer(kafka, conf.kafka.metrics_group_id,
|
|
|
|
conf.kafka.metrics_topic, auto_commit=False,
|
|
|
|
iter_timeout=1)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
influxdb_client = InfluxDBClient(conf.influxdb.ip_address,
|
|
|
|
conf.influxdb.port,
|
|
|
|
conf.influxdb.user,
|
|
|
|
conf.influxdb.password,
|
|
|
|
conf.influxdb.database_name)
|
2014-10-02 08:33:19 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
max_wait_time_secs = conf.kafka.metrics_max_wait_time_seconds
|
|
|
|
batch_size = conf.kafka.metrics_batch_size
|
|
|
|
super(MetricPersister, self).__init__(consumer, influxdb_client,
|
|
|
|
max_wait_time_secs, batch_size)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
def process_message(self, message):
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug(message.message.value.decode('utf8'))
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
decoded = json.loads(message.message.value)
|
|
|
|
LOG.debug(json.dumps(decoded, sort_keys=True, indent=4))
|
2014-10-02 08:33:19 -06:00
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
metric = decoded['metric']
|
|
|
|
|
|
|
|
metric_name = metric['name']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('name: %s', metric_name)
|
2014-09-16 16:15:49 -06:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
creation_time = decoded['creation_time']
|
|
|
|
LOG.debug('creation time: %s', creation_time)
|
|
|
|
|
|
|
|
region = decoded['meta']['region']
|
|
|
|
LOG.debug('region: %s', region)
|
|
|
|
|
|
|
|
tenant_id = decoded['meta']['tenantId']
|
|
|
|
LOG.debug('tenant id: %s', tenant_id)
|
|
|
|
|
|
|
|
dimensions = {}
|
2014-10-08 17:06:33 -06:00
|
|
|
if 'dimensions' in metric:
|
|
|
|
for dimension_name in metric['dimensions']:
|
2014-10-03 09:38:12 -06:00
|
|
|
dimensions[dimension_name] = (
|
2014-10-08 17:06:33 -06:00
|
|
|
metric['dimensions'][dimension_name])
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('dimension: %s : %s', dimension_name,
|
|
|
|
dimensions[dimension_name])
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
time_stamp = metric['timestamp']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('timestamp %s', time_stamp)
|
|
|
|
|
2014-10-08 17:06:33 -06:00
|
|
|
value = metric['value']
|
2014-10-03 09:38:12 -06:00
|
|
|
LOG.debug('value: %s', value)
|
|
|
|
|
|
|
|
url_encoded_serie_name = (
|
|
|
|
urllib.quote(metric_name.encode('utf8'),
|
|
|
|
safe='') + '?' + urllib.quote(
|
|
|
|
tenant_id.encode('utf8'), safe='') + '&' + urllib.quote(
|
|
|
|
region.encode('utf8'), safe=''))
|
|
|
|
|
|
|
|
for dimension_name in dimensions:
|
|
|
|
url_encoded_serie_name += (
|
|
|
|
'&' + urllib.quote(dimension_name.encode('utf8'),
|
|
|
|
safe='') + '=' + urllib.quote(
|
|
|
|
dimensions[dimension_name].encode('utf8'), safe=''))
|
|
|
|
|
|
|
|
LOG.debug("url_encoded_serie_name: %s", url_encoded_serie_name)
|
|
|
|
|
|
|
|
data = {"points": [[value, time_stamp]],
|
|
|
|
"name": url_encoded_serie_name, "columns": ["value", "time"]}
|
|
|
|
|
|
|
|
LOG.debug(data)
|
2014-11-19 15:20:27 -07:00
|
|
|
|
2014-10-03 09:38:12 -06:00
|
|
|
return data
|
2014-09-16 16:15:49 -06:00
|
|
|
|
|
|
|
|
2014-09-29 09:34:48 -06:00
|
|
|
def main_service():
|
2014-09-16 16:15:49 -06:00
|
|
|
"""Method to use with Openstack service.
|
|
|
|
"""
|
|
|
|
|
|
|
|
service.prepare_service()
|
|
|
|
launcher = os_service.ServiceLauncher()
|
|
|
|
launcher.launch_service(Persister())
|
|
|
|
launcher.wait()
|
|
|
|
|
|
|
|
# Used if run without Openstack service.
|
|
|
|
if __name__ == "__main__":
|
2014-10-02 08:33:19 -06:00
|
|
|
sys.exit(main())
|