Graph fast fail-over

* Initialize quickly upon fail-over without requesting updates.
* In case of downtime, Vitrage-graph startup will requests collector updates
* vitrage-persistor has an expirer timer to remove old db events

Story: 2002663
Task: 22473
Change-Id: Icccf230e69c41a2f115c0797e60df774db637594
Depends-On: I042665e0d642ba36a97af84a6dc0581888025207
Depends-On: Id5dbd165a1e0220e4e24207e8d237f94415fc490
This commit is contained in:
Idan Hefetz
2018-07-10 15:04:48 +00:00
parent d5c742f460
commit fb4088c32c
30 changed files with 496 additions and 381 deletions

View File

@@ -22,7 +22,4 @@ OPTS = [
cfg.BoolOpt('enable_persistency',
default=False,
help='Periodically store entity graph snapshot to database'),
cfg.IntOpt('graph_persistency_interval',
default=3600,
help='Store graph to database every X seconds'),
]

View File

@@ -1,60 +0,0 @@
# Copyright 2018 - Nokia
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import print_function
from oslo_log import log
from dateutil import parser
from vitrage.common.constants import DatasourceProperties as DSProps
from vitrage.graph.driver.networkx_graph import NXGraph
from vitrage import storage
from vitrage.storage.sqlalchemy import models
from vitrage.utils import datetime
from vitrage.utils.datetime import utcnow
LOG = log.getLogger(__name__)
class GraphPersistor(object):
def __init__(self, conf):
super(GraphPersistor, self).__init__()
self.db_connection = storage.get_connection_from_config(conf)
self.last_event_timestamp = datetime.datetime.utcnow()
def store_graph(self, graph):
LOG.info('Graph persistency running..')
try:
graph_snapshot = graph.write_gpickle()
db_row = models.GraphSnapshot(
last_event_timestamp=self.last_event_timestamp,
graph_snapshot=graph_snapshot)
self.db_connection.graph_snapshots.create(db_row)
except Exception as e:
LOG.exception("Graph is not stored: %s", e)
def load_graph(self, timestamp=None):
db_row = self.db_connection.graph_snapshots.query(timestamp) if \
timestamp else self.db_connection.graph_snapshots.query(utcnow())
return NXGraph.read_gpickle(db_row.graph_snapshot) if db_row else None
def delete_graph_snapshots(self, timestamp):
"""Deletes all graph snapshots until timestamp"""
self.db_connection.graph_snapshots.delete(timestamp)
def update_last_event_timestamp(self, event):
timestamp = event.get(DSProps.SAMPLE_DATE)
self.last_event_timestamp = parser.parse(timestamp) if timestamp \
else None

View File

@@ -13,16 +13,15 @@
# under the License.
from __future__ import print_function
from concurrent.futures import ThreadPoolExecutor
import cotyledon
import dateutil.parser
import oslo_messaging as oslo_m
from futurist import periodics
from oslo_log import log
from vitrage.common.constants import DatasourceProperties as DSProps
from vitrage.common.constants import GraphAction
import oslo_messaging as oslo_m
from vitrage.common.utils import spawn
from vitrage import messaging
from vitrage.storage.sqlalchemy import models
LOG = log.getLogger(__name__)
@@ -39,11 +38,13 @@ class PersistorService(cotyledon.Service):
self.listener = messaging.get_notification_listener(
transport, [target],
[VitragePersistorEndpoint(self.db_connection)])
self.scheduler = Scheduler(conf, db_connection)
def run(self):
LOG.info("Vitrage Persistor Service - Starting...")
self.listener.start()
self.scheduler.start_periodic_tasks()
LOG.info("Vitrage Persistor Service - Started!")
@@ -57,19 +58,45 @@ class PersistorService(cotyledon.Service):
class VitragePersistorEndpoint(object):
funcs = {}
def __init__(self, db_connection):
self.db_connection = db_connection
def info(self, ctxt, publisher_id, event_type, payload, metadata):
LOG.debug('Vitrage Event Info: payload %s', payload)
self.process_event(payload)
LOG.debug('Event_type: %s Payload %s', event_type, payload)
if event_type and event_type in self.funcs.keys():
self.funcs[event_type](self.db_connection, event_type, payload)
def process_event(self, data):
""":param data: Serialized to a JSON formatted ``str`` """
if data.get(DSProps.EVENT_TYPE) == GraphAction.END_MESSAGE:
return
collector_timestamp = \
dateutil.parser.parse(data.get(DSProps.SAMPLE_DATE))
event_row = models.Event(payload=data,
collector_timestamp=collector_timestamp)
self.db_connection.events.create(event_row)
class Scheduler(object):
def __init__(self, conf, db):
self.conf = conf
self.db = db
self.periodic = None
def start_periodic_tasks(self):
self.periodic = periodics.PeriodicWorker.create(
[], executor_factory=lambda: ThreadPoolExecutor(max_workers=10))
self.add_expirer_timer()
spawn(self.periodic.start)
def add_expirer_timer(self):
spacing = 60
@periodics.periodic(spacing=spacing)
def expirer_periodic():
try:
event_id = self.db.graph_snapshots.query_snapshot_event_id()
if event_id:
LOG.debug('Expirer deleting event - id=%s', event_id)
self.db.events.delete(event_id)
except Exception:
LOG.exception('DB periodic cleanup run failed.')
self.periodic.add(expirer_periodic)
LOG.info("Database periodic cleanup starting (spacing=%ss)", spacing)