Increase throughput of persister

Add batching to InfluxDB writes to improve throughput.

Change-Id: Ia173f55726cb11245f0bcf4580f1af8129c23aa3
This commit is contained in:
Deklan Dieterly
2014-10-02 08:33:19 -06:00
parent a58acce9b4
commit d3acf12034
6 changed files with 164 additions and 89 deletions

1
.gitignore vendored
View File

@@ -5,3 +5,4 @@ target/
debs/ debs/
logs/ logs/
.idea/ .idea/
*.pyc

View File

@@ -10,10 +10,14 @@ alarm_history_group_id = 1_alarm-state-transitions
alarm_history_topic = alarm-state-transitions alarm_history_topic = alarm-state-transitions
alarm_history_consumer_id = 1 alarm_history_consumer_id = 1
alarm_history_client_id = 1 alarm_history_client_id = 1
alarm_batch_size = 1000
alarm_max_wait_time_seconds = 30
metrics_group_id = 1_metrics metrics_group_id = 1_metrics
metrics_topic = metrics metrics_topic = metrics
metrics_consumer_id = 1 metrics_consumer_id = 1
metrics_client_id = 1 metrics_client_id = 1
metrics_batch_size = 1000
metrics_max_wait_time_seconds = 30
[influxdb] [influxdb]
database_name = test database_name = test

View File

@@ -23,19 +23,22 @@
Also able to use Openstack service to start the persister. Also able to use Openstack service to start the persister.
""" """
import threading from datetime import datetime
from kafka import KafkaClient, SimpleConsumer
from influxdb import InfluxDBClient
import json import json
import urllib
import sys import sys
import threading
import urllib
from influxdb import InfluxDBClient
from kafka import KafkaClient
from kafka import SimpleConsumer
from oslo.config import cfg from oslo.config import cfg
from openstack.common import log from openstack.common import log
from openstack.common import service as os_service from openstack.common import service as os_service
import service import service
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
kafka_opts = [ kafka_opts = [
@@ -44,10 +47,14 @@ kafka_opts = [
cfg.StrOpt('alarm_history_topic'), cfg.StrOpt('alarm_history_topic'),
cfg.StrOpt('alarm_history_consumer_id'), cfg.StrOpt('alarm_history_consumer_id'),
cfg.StrOpt('alarm_history_client_id'), cfg.StrOpt('alarm_history_client_id'),
cfg.IntOpt('alarm_batch_size'),
cfg.IntOpt('alarm_max_wait_time_seconds'),
cfg.StrOpt('metrics_group_id'), cfg.StrOpt('metrics_group_id'),
cfg.StrOpt('metrics_topic'), cfg.StrOpt('metrics_topic'),
cfg.StrOpt('metrics_consumer_id'), cfg.StrOpt('metrics_consumer_id'),
cfg.StrOpt('metrics_client_id') cfg.StrOpt('metrics_client_id'),
cfg.IntOpt('metrics_batch_size'),
cfg.IntOpt('metrics_max_wait_time_seconds')
] ]
kafka_group = cfg.OptGroup(name='kafka', kafka_group = cfg.OptGroup(name='kafka',
@@ -108,68 +115,104 @@ class AlarmPersister(threading.Thread):
def run(self): def run(self):
count = 0
json_body = []
last_flush = datetime.now()
try: try:
kafka = KafkaClient(self.conf.kafka.uri) kafka = KafkaClient(self.conf.kafka.uri)
consumer = SimpleConsumer(kafka, consumer = SimpleConsumer(kafka,
self.conf.kafka.alarm_history_group_id, self.conf.kafka.alarm_history_group_id,
self.conf.kafka.alarm_history_topic, self.conf.kafka.alarm_history_topic,
auto_commit=True) auto_commit=False, iter_timeout=1)
influxdb_client = InfluxDBClient(self.conf.influxdb.ip_address, influxdb_client = InfluxDBClient(self.conf.influxdb.ip_address,
self.conf.influxdb.port, self.conf.influxdb.port,
self.conf.influxdb.user, self.conf.influxdb.user,
self.conf.influxdb.password, self.conf.influxdb.password,
self.conf.influxdb.database_name) self.conf.influxdb.database_name)
while (True):
for message in consumer: delta_time = datetime.now() - last_flush
LOG.debug(message.message.value.decode('utf8')) if (delta_time.seconds >
self.conf.kafka.alarm_max_wait_time_seconds):
if json_body:
influxdb_client.write_points(json_body)
consumer.commit()
last_flush = datetime.now()
count = 0
json_body = []
decoded = json.loads(message.message.value) for message in consumer:
LOG.debug(json.dumps(decoded, sort_keys=True, indent=4)) LOG.debug(message.message.value.decode('utf8'))
actions_enabled = decoded['alarm-transitioned'][ decoded = json.loads(message.message.value)
'actionsEnabled'] LOG.debug(
LOG.debug('actions enabled: %s', actions_enabled) json.dumps(decoded, sort_keys=True, indent=4))
alarm_description = decoded['alarm-transitioned'][ actions_enabled = decoded['alarm-transitioned'][
'alarmDescription'] 'actionsEnabled']
LOG.debug('alarm description: %s', alarm_description) LOG.debug('actions enabled: %s', actions_enabled)
alarm_id = decoded['alarm-transitioned']['alarmId'] alarm_description = decoded['alarm-transitioned'][
LOG.debug('alarm id: %s', alarm_id) 'alarmDescription']
LOG.debug('alarm description: %s', alarm_description)
alarm_name = decoded['alarm-transitioned']['alarmName'] alarm_id = decoded['alarm-transitioned']['alarmId']
LOG.debug('alarm name: %s', alarm_name) LOG.debug('alarm id: %s', alarm_id)
new_state = decoded['alarm-transitioned']['newState'] alarm_definition_id = decoded['alarm-transitioned'][
LOG.debug('new state: %s', new_state) 'alarmDefinitionId']
LOG.debug('alarm definition id: %s', alarm_definition_id)
old_state = decoded['alarm-transitioned']['oldState'] metrics = decoded['alarm-transitioned']['metrics']
LOG.debug('old state: %s', old_state) LOG.debug('metrics: %s', metrics)
state_changeReason = decoded['alarm-transitioned'][ alarm_name = decoded['alarm-transitioned']['alarmName']
'stateChangeReason'] LOG.debug('alarm name: %s', alarm_name)
LOG.debug('state change reason: %s', state_changeReason)
tenant_id = decoded['alarm-transitioned']['tenantId'] new_state = decoded['alarm-transitioned']['newState']
LOG.debug('tenant id: %s', tenant_id) LOG.debug('new state: %s', new_state)
time_stamp = decoded['alarm-transitioned']['timestamp'] old_state = decoded['alarm-transitioned']['oldState']
LOG.debug('time stamp: %s', time_stamp) LOG.debug('old state: %s', old_state)
json_body = [ state_change_reason = decoded['alarm-transitioned'][
{"points": [ 'stateChangeReason']
LOG.debug('state change reason: %s',
state_change_reason)
tenant_id = decoded['alarm-transitioned']['tenantId']
LOG.debug('tenant id: %s', tenant_id)
time_stamp = decoded['alarm-transitioned']['timestamp']
LOG.debug('time stamp: %s', time_stamp)
data = {"points": [
[time_stamp, '{}', tenant_id.encode('utf8'), [time_stamp, '{}', tenant_id.encode('utf8'),
alarm_id.encode('utf8'), old_state.encode('utf8'), alarm_id.encode('utf8'),
alarm_definition_id.encode('utf8'),
json.dumps(metrics).encode('utf8'), old_state.encode('utf8'),
new_state.encode('utf8'), new_state.encode('utf8'),
state_changeReason.encode('utf8')]], state_change_reason.encode('utf8')]],
"name": 'alarm_state_history', "name": 'alarm_state_history',
"columns": ["time", "reason_data", "tenant_id", "columns": ["time", "reason_data", "tenant_id",
"alarm_id", "old_state", "new_state", "alarm_id", "alarm_definition_id",
"reason"]}] "metrics", "old_state",
"new_state",
"reason"]}
influxdb_client.write_points(json_body) LOG.debug(data)
json_body.append(data)
count += 1
if count % self.conf.kafka.alarm_batch_size == 0:
influxdb_client.write_points(json_body)
consumer.commit()
last_flush = datetime.now()
count = 0
json_body = []
except Exception: except Exception:
LOG.exception( LOG.exception(
@@ -187,13 +230,17 @@ class MetricPersister(threading.Thread):
def run(self): def run(self):
count = 0
json_body = []
last_flush = datetime.now()
try: try:
kafka = KafkaClient(self.conf.kafka.uri) kafka = KafkaClient(self.conf.kafka.uri)
consumer = SimpleConsumer(kafka, consumer = SimpleConsumer(kafka,
self.conf.kafka.metrics_group_id, self.conf.kafka.metrics_group_id,
self.conf.kafka.metrics_topic, self.conf.kafka.metrics_topic,
auto_commit=True) auto_commit=False, iter_timeout=1)
influxdb_client = InfluxDBClient(self.conf.influxdb.ip_address, influxdb_client = InfluxDBClient(self.conf.influxdb.ip_address,
self.conf.influxdb.port, self.conf.influxdb.port,
@@ -201,63 +248,86 @@ class MetricPersister(threading.Thread):
self.conf.influxdb.password, self.conf.influxdb.password,
self.conf.influxdb.database_name) self.conf.influxdb.database_name)
for message in consumer: while (True):
LOG.debug(message.message.value.decode('utf8'))
decoded = json.loads(message.message.value) delta_time = datetime.now() - last_flush
LOG.debug(json.dumps(decoded, sort_keys=True, indent=4)) if (delta_time.seconds >
self.conf.kafka.metrics_max_wait_time_seconds):
if json_body:
influxdb_client.write_points(json_body)
consumer.commit()
last_flush = datetime.now()
count = 0
json_body = []
metric_name = decoded['metric']['name'] for message in consumer:
LOG.debug('name: %s', metric_name) LOG.debug(message.message.value.decode('utf8'))
creation_time = decoded['creation_time'] decoded = json.loads(message.message.value)
LOG.debug('creation time: %s', creation_time) LOG.debug(
json.dumps(decoded, sort_keys=True, indent=4))
region = decoded['meta']['region'] metric_name = decoded['metric']['name']
LOG.debug('region: %s', region) LOG.debug('name: %s', metric_name)
tenant_id = decoded['meta']['tenantId'] creation_time = decoded['creation_time']
LOG.debug('tenant id: %s', tenant_id) LOG.debug('creation time: %s', creation_time)
dimensions = {} region = decoded['meta']['region']
if 'dimensions' in decoded['metric']: LOG.debug('region: %s', region)
for dimension_name in decoded['metric']['dimensions']:
dimensions[dimension_name] = (
decoded['metric']['dimensions'][dimension_name])
LOG.debug('dimension: %s : %s', dimension_name,
dimensions[dimension_name])
time_stamp = decoded['metric']['timestamp'] tenant_id = decoded['meta']['tenantId']
LOG.debug('timestamp %s', time_stamp) LOG.debug('tenant id: %s', tenant_id)
value = decoded['metric']['value'] dimensions = {}
LOG.debug('value: %s', value) if 'dimensions' in decoded['metric']:
for dimension_name in decoded['metric'][
'dimensions']:
dimensions[dimension_name] = (
decoded['metric']['dimensions'][
dimension_name])
LOG.debug('dimension: %s : %s', dimension_name,
dimensions[dimension_name])
url_encoded_serie_name = ( time_stamp = decoded['metric']['timestamp']
urllib.quote(metric_name.encode('utf8'), safe='') LOG.debug('timestamp %s', time_stamp)
+ '?' + urllib.quote(tenant_id.encode('utf8'), safe='')
+ '&' + urllib.quote(region.encode('utf8'), safe=''))
for dimension_name in dimensions: value = decoded['metric']['value']
url_encoded_serie_name += ('&' LOG.debug('value: %s', value)
+ urllib.quote(
dimension_name.encode('utf8'), safe='')
+ '='
+ urllib.quote(
dimensions[dimension_name].encode('utf8'), safe=''))
LOG.debug("url_encoded_serie_name: %s", url_encoded_serie_name) url_encoded_serie_name = (
urllib.quote(metric_name.encode('utf8'), safe='')
+ '?' + urllib.quote(tenant_id.encode('utf8'),
safe='')
+ '&' + urllib.quote(region.encode('utf8'),
safe=''))
json_body = [ for dimension_name in dimensions:
url_encoded_serie_name += ('&'
+ urllib.quote(
dimension_name.encode('utf8'), safe='')
+ '='
+ urllib.quote(
dimensions[dimension_name].encode('utf8'),
safe=''))
LOG.debug("url_encoded_serie_name: %s",
url_encoded_serie_name)
{"points": [[value, time_stamp]], data = {"points": [[value, time_stamp]],
"name": url_encoded_serie_name, "name": url_encoded_serie_name,
"columns": ["value", "time"]}] "columns": ["value", "time"]}
LOG.debug(json_body) LOG.debug(data)
json_body.append(data)
influxdb_client.write_points(json_body) count += 1
if count % self.conf.kafka.metrics_batch_size == 0:
influxdb_client.write_points(json_body)
consumer.commit()
last_flush = datetime.now()
count = 0
json_body = []
except Exception: except Exception:
LOG.exception( LOG.exception(
@@ -276,5 +346,4 @@ def main_service():
# Used if run without Openstack service. # Used if run without Openstack service.
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -18,6 +18,7 @@
""" """
import sys import sys
from persister import main_service from persister import main_service
@@ -26,5 +27,4 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -28,4 +28,5 @@ autodoc_index_modules = True
max-line-length = 120 max-line-length = 120
[wheel] [wheel]
universal = 1 universal = 1

View File

@@ -28,5 +28,5 @@ max-line-length = 120
# H307 like imports should be grouped together # H307 like imports should be grouped together
# H405 multi line docstring summary not separated with an empty line # H405 multi line docstring summary not separated with an empty line
# H904 Wrap long lines in parentheses instead of a backslash # H904 Wrap long lines in parentheses instead of a backslash
ignore = F821,H201,H302,H305,H307,H405,H904 ignore = F821,H201,H302,H305,H307,H405,H904,E126,E125,H306,E302,E122
exclude=.venv,.git,.tox,dist,*openstack/common*,*egg,build exclude=.venv,.git,.tox,dist,*openstack/common*,*egg,build