290 lines
9.6 KiB
Python
290 lines
9.6 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright (c) 2016 Hewlett Packard Enterprise Development Company, L.P.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import abc
|
|
import datetime
|
|
import itertools
|
|
import json
|
|
import logging
|
|
import six
|
|
from six.moves import socketserver
|
|
import threading
|
|
import time
|
|
|
|
from monasca_analytics.source import base
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MarkovChainSource(base.BaseSource):
|
|
"""Base Source of data generated according a Markov chain model.
|
|
|
|
A Markov chain model baked by a finite state machine
|
|
to model more realistic scenario with optional confounding variables.
|
|
This source is useful to see how causality is discovered in the presence
|
|
of confounding variable as well as scenario where some alert appears only
|
|
in a deterministic fashion.
|
|
"""
|
|
|
|
@abc.abstractmethod
|
|
def _create_system(self):
|
|
"""
|
|
Abstract method that should be implemented by subclasses
|
|
|
|
:rtype: list[StateNode]
|
|
:returns: List of StateNode that do not have any dependencies
|
|
"""
|
|
pass
|
|
|
|
def create_dstream(self, ssc):
|
|
"""Initiates the system in a TCP server, and creates the dstream object
|
|
|
|
The _dstream object is created before this source is bound
|
|
to the consumers. It uses a socketTextStream, to read data from
|
|
the ThreadingTCPServer.
|
|
|
|
:type ssc: pyspark.streaming.StreamingContext
|
|
:param ssc: Spark Streaming Context that provides the data input
|
|
"""
|
|
system = LeafNodes(self._create_system())
|
|
port = self._start_thread(system)
|
|
return ssc.socketTextStream(
|
|
"localhost",
|
|
port)
|
|
|
|
def terminate_source(self):
|
|
# This is to allow all the messages
|
|
# being sent by the handler to clear up.
|
|
self._server.terminate = True
|
|
self._server.shutdown()
|
|
self._server.server_close()
|
|
self._server_thread.join()
|
|
|
|
def _start_thread(self, system):
|
|
self._server = socketserver.ThreadingTCPServer(
|
|
("", 0), # Let the OS pick a port for us
|
|
FMSTCPHandler, # Handler of the requests
|
|
False)
|
|
self._server.allow_reuse_address = True
|
|
self._server.server_bind()
|
|
self._server.server_activate()
|
|
self._server.terminate = False
|
|
self._server.system = system
|
|
self._server.sleep_in_seconds = self._config["sleep"]
|
|
if "min_event_per_burst" in self._config:
|
|
self._server.min_event_per_burst = \
|
|
int(self._config["min_event_per_burst"])
|
|
else:
|
|
self._server.min_event_per_burst = 500
|
|
|
|
self._server_thread = threading.Thread(target=self._serve_forever)
|
|
self._server_thread.start()
|
|
port_used = self._server.socket.getsockname()[1]
|
|
return port_used
|
|
|
|
def _serve_forever(self):
|
|
try:
|
|
self._server.serve_forever()
|
|
except IOError:
|
|
logger.debug("Markov chain source's server stopped.")
|
|
|
|
|
|
class MetaId(abc.ABCMeta):
|
|
|
|
def __new__(mcs, name, bases, namespace): # @NoSelf
|
|
cls = super(abc.ABCMeta, mcs).__new__(mcs, name, bases, namespace)
|
|
cls.ids = itertools.count(1)
|
|
return cls
|
|
|
|
|
|
class LeafNodes(object):
|
|
|
|
def __init__(self, state_nodes):
|
|
"""Constructor with list of state nodes
|
|
|
|
:type state_nodes: list[StateNode]
|
|
:param state_nodes: The node of the directed acyclic graph
|
|
that has no dependencies.
|
|
"""
|
|
self._state_nodes = state_nodes
|
|
|
|
def next_state(self, hour_of_day):
|
|
"""Move to next state
|
|
|
|
:type hour_of_day: int
|
|
:param hour_of_day: An hour of the day that is used by
|
|
StateNode.next_state
|
|
"""
|
|
ignored_states = set([])
|
|
for s in self._state_nodes:
|
|
s.next_state(hour_of_day, ignored_states)
|
|
|
|
def collect_events(self, hour_of_day, fake_date, request):
|
|
"""Get list of events
|
|
|
|
:type hour_of_day: int
|
|
:param hour_of_day: An hour of the day that is used by
|
|
StateNode.collect_event
|
|
:type fake_date: datetime.datetime
|
|
:param fake_date: A date that you can use to generate a ctime.
|
|
:type request: RequestBuilder
|
|
:param request: Request object to send data.
|
|
:rtype: list
|
|
:returns: List of event. Specific to the event builder.
|
|
"""
|
|
for node in self._state_nodes:
|
|
node.collect_events(hour_of_day, fake_date, request)
|
|
|
|
|
|
@six.add_metaclass(MetaId)
|
|
class StateNode(object):
|
|
"""This class describes a particular node in the dependency graph.
|
|
|
|
It holds the state information relative to that node and the
|
|
dependencies with the other nodes. The Markov
|
|
It is managed by one instance of a StateDescriptor.
|
|
"""
|
|
|
|
def __init__(self, initial_state, markov_chain, trigger, _id=None):
|
|
"""Constructor
|
|
|
|
:type _id: int | str
|
|
:param _id: Id of the node
|
|
:type initial_state: str | int | None
|
|
:param initial_state: Initial state for this node.
|
|
:type markov_chain:
|
|
monasca_analytics.source.markov_chain.transition.MarkovChain
|
|
:param markov_chain: Markov Chain managing this node state.
|
|
:type trigger:
|
|
list | monasca_analytics.source.markov_chain.events.Trigger
|
|
:param trigger: List of triggers or single trigger that you want
|
|
to attached to this node.
|
|
"""
|
|
if _id is None:
|
|
self._id = next(self.__class__.ids)
|
|
else:
|
|
self._id = _id
|
|
self.state = initial_state
|
|
self._markov_chain = markov_chain
|
|
if type(trigger) == list:
|
|
self._triggers = trigger
|
|
else:
|
|
self._triggers = [trigger]
|
|
self.dependencies = []
|
|
|
|
def id(self):
|
|
"""
|
|
:rtype: int | str
|
|
:returns: Returns this object id.
|
|
"""
|
|
return self._id
|
|
|
|
def next_state(self, hour_of_day, ignored_states):
|
|
"""Move this element to the next state if it is not in the ignored set
|
|
|
|
This will affect this element's children that conditionally depends
|
|
on this element's state.
|
|
|
|
:type hour_of_day: int
|
|
:param hour_of_day: An integer in the range of 0 to 24 to
|
|
express the hour of the day.
|
|
:type ignored_states: set
|
|
:param ignored_states: set of states that should not change.
|
|
"""
|
|
if self._id not in ignored_states:
|
|
ignored_states.add(self._id)
|
|
for dep in self.dependencies:
|
|
dep.next_state(hour_of_day, ignored_states)
|
|
self._markov_chain.apply_on(self, hour_of_day)
|
|
|
|
def collect_events(self, hour_of_day, fake_date, request):
|
|
"""Collect event triggered for the next burst.
|
|
|
|
:type hour_of_day: int
|
|
:param hour_of_day: an integer in the range of 0 to 24 to express
|
|
he hour of the day.
|
|
:type fake_date: datetime.datetime
|
|
:param fake_date: A date that you can use to generate a ctime.
|
|
:type request: RequestBuilder
|
|
:param request: Request builder to send specific events
|
|
:rtype: list
|
|
:returns: events for this step or None
|
|
"""
|
|
for trigger in self._triggers:
|
|
trigger.apply_on(self, hour_of_day, fake_date, request)
|
|
for dep in self.dependencies:
|
|
dep.collect_events(hour_of_day, fake_date, request)
|
|
|
|
|
|
class FMSTCPHandler(socketserver.BaseRequestHandler):
|
|
"""A TCP server handler for the alert generation."""
|
|
|
|
def handle(self):
|
|
"""Handles incoming connection and pushing data into them."""
|
|
|
|
fake_date = datetime.datetime.today()
|
|
hour_of_day = fake_date.hour
|
|
while not self.server.terminate:
|
|
request = RequestBuilder(self.request)
|
|
|
|
# Collect some events
|
|
while request.nb_events() < self.server.min_event_per_burst:
|
|
self.server.system.next_state(hour_of_day)
|
|
self.server.system.collect_events(hour_of_day, fake_date,
|
|
request)
|
|
hour_of_day += 1
|
|
fake_date += datetime.timedelta(hours=1)
|
|
if hour_of_day > 24:
|
|
hour_of_day = 0
|
|
|
|
try:
|
|
request.finalize()
|
|
except IOError:
|
|
logger.debug("Source is now off")
|
|
self.server.terminate = True
|
|
|
|
time.sleep(self.server.sleep_in_seconds)
|
|
|
|
|
|
class RequestBuilder(object):
|
|
|
|
def __init__(self, request):
|
|
self._request = request
|
|
self._collected_data = []
|
|
|
|
def send(self, data):
|
|
"""
|
|
Send an object over the network.
|
|
:param data: Object to send.
|
|
"""
|
|
self._collected_data.append(data)
|
|
|
|
def nb_events(self):
|
|
return len(self._collected_data)
|
|
|
|
def finalize(self):
|
|
for data in self._collected_data:
|
|
self._request.send("{0}\n".format(json.dumps(data,
|
|
cls=DictEncoder)))
|
|
self._request = None
|
|
self._collected_data = None
|
|
|
|
|
|
class DictEncoder(json.JSONEncoder):
|
|
|
|
def default(self, o):
|
|
return o.__dict__
|