Cleaned up a lot of the Pep8 violations

Cleaned up all but four of the pep8 violations
E501 Line length > 80 characters
F401 module imported but unused
H302  import only modules (DEPRECATED)
H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)

Change-Id: Id24bff6c5f8b8630a9495f49983324342841866f
This commit is contained in:
gary-hessler 2014-08-21 17:33:29 -06:00
parent d704b17d97
commit 45b156b9fe
91 changed files with 1026 additions and 795 deletions

View File

@ -8,37 +8,38 @@ from __future__ import print_function
import logging
import os
from pprint import pprint
import pprint
import re
import time
import traceback
from monagent.common import check_status
from monagent.common.keystone import Keystone
from monagent.common.config import get_confd_path
from monagent.common.exceptions import CheckException, NaN, Infinity, UnknownValue
from monagent.common.util import LaconicFilter, get_hostname, get_os
import yaml
import monagent.common.aggregator
import monagent.common.config
import monagent.common.exceptions
import monagent.common.keystone
import monagent.common.util
log = logging.getLogger(__name__)
# todo convert all checks to the new interface then remove this.
# Is the LaconicFilter on logs used elsewhere?
# ==============================================================================
# =============================================================================
# DEPRECATED
# ------------------------------
# If you are writing your own check, you should inherit from AgentCheck
# and not this class. This class will be removed in a future version
# of the agent.
# ==============================================================================
# =============================================================================
class Check(object):
"""
(Abstract) class for all checks with the ability to:
"""(Abstract) class for all checks with the ability to:
* store 1 (and only 1) sample for gauges per metric/tag combination
* compute rates for counters
* only log error messages once (instead of each time they occur)
"""
def __init__(self, logger, agent_config=None):
@ -52,13 +53,14 @@ class Check(object):
self._counters = {} # metric_name: bool
self.logger = logger
try:
self.logger.addFilter(LaconicFilter())
self.logger.addFilter(monagent.common.util.LaconicFilter())
except Exception:
self.logger.exception("Trying to install laconic log filter and failed")
@staticmethod
def normalize(metric, prefix=None):
"""Turn a metric into a well-formed metric name
prefix.b.c
"""
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
@ -81,20 +83,21 @@ class Check(object):
return device_name.strip().lower().replace(' ', '_')
def counter(self, metric):
"""
Treats the metric as a counter, i.e. computes its per second derivative
"""Treats the metric as a counter, i.e. computes its per second derivative
ACHTUNG: Resets previous values associated with this metric.
"""
self._counters[metric] = True
self._sample_store[metric] = {}
def is_counter(self, metric):
"Is this metric a counter?"
"""Is this metric a counter?
"""
return metric in self._counters
def gauge(self, metric):
"""
Treats the metric as a gauge, i.e. keep the data as is
"""Treats the metric as a gauge, i.e. keep the data as is
ACHTUNG: Resets previous values associated with this metric.
"""
self._sample_store[metric] = {}
@ -106,36 +109,36 @@ class Check(object):
return self.is_metric(metric) and not self.is_counter(metric)
def get_metric_names(self):
"Get all metric names"
"""Get all metric names.
"""
return self._sample_store.keys()
def save_gauge(self, metric, value, timestamp=None,
dimensions=None, hostname=None, device_name=None):
""" Save a gauge value. """
"""Save a gauge value.
"""
if not self.is_gauge(metric):
self.gauge(metric)
self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
def save_sample(self, metric, value, timestamp=None,
dimensions=None, hostname=None, device_name=None):
"""Save a simple sample, evict old values if needed
"""Save a simple sample, evict old values if needed.
"""
if dimensions is None:
dimensions = {}
from common.util import cast_metric_val
if timestamp is None:
timestamp = time.time()
if metric not in self._sample_store:
raise CheckException("Saving a sample for an undefined metric: %s" % metric)
raise monagent.common.exceptions.CheckException("Saving a sample for an undefined metric: %s" % metric)
try:
value = cast_metric_val(value)
value = monagent.common.util.cast_metric_val(value)
except ValueError as ve:
raise NaN(ve)
raise monagent.common.exceptions.NaN(ve)
# Sort and validate dimensions
if dimensions is not None and not isinstance(dimensions, dict):
raise CheckException("Dimensions must be a dictionary")
raise monagent.common.exceptions.CheckException("Dimensions must be a dictionary")
# Data eviction rules
key = (tuple(sorted(dimensions.items())), device_name)
@ -148,8 +151,8 @@ class Check(object):
self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
[(timestamp, value, hostname, device_name)]
else:
raise CheckException("%s must be either gauge or counter, skipping sample at %s" %
(metric, time.ctime(timestamp)))
raise monagent.common.exceptions.CheckException("%s must be either gauge or counter, skipping sample at %s" %
(metric, time.ctime(timestamp)))
if self.is_gauge(metric):
# store[metric][dimensions] = (ts, val) - only 1 value allowed
@ -159,26 +162,27 @@ class Check(object):
@classmethod
def _rate(cls, sample1, sample2):
"Simple rate"
"""Simple rate.
"""
try:
interval = sample2[0] - sample1[0]
if interval == 0:
raise Infinity()
raise monagent.common.exceptions.Infinity()
delta = sample2[1] - sample1[1]
if delta < 0:
raise UnknownValue()
raise monagent.common.exceptions.UnknownValue()
return (sample2[0], delta / interval, sample2[2], sample2[3])
except Infinity:
except monagent.common.exceptions.Infinity:
raise
except UnknownValue:
except monagent.common.exceptions.UnknownValue:
raise
except Exception as e:
raise NaN(e)
raise monagent.common.exceptions.NaN(e)
def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
"""Get (timestamp-epoch-style, value)
"""Get (timestamp-epoch-style, value).
"""
if dimensions is None:
dimensions = {}
@ -188,11 +192,11 @@ class Check(object):
# Never seen this metric
if metric not in self._sample_store:
raise UnknownValue()
raise monagent.common.exceptions.UnknownValue()
# Not enough value to compute rate
elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
raise UnknownValue()
raise monagent.common.exceptions.UnknownValue()
elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
res = self._rate(
@ -205,16 +209,18 @@ class Check(object):
return self._sample_store[metric][key][-1]
else:
raise UnknownValue()
raise monagent.common.exceptions.UnknownValue()
def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
"Return the last value for that metric"
"""Return the last value for that metric.
"""
x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
assert isinstance(x, tuple) and len(x) == 4, x
return x[1]
def get_samples_with_timestamps(self, expire=True):
"Return all values {metric: (ts, value)} for non-tagged metrics"
"""Return all values {metric: (ts, value)} for non-tagged metrics.
"""
values = {}
for m in self._sample_store:
try:
@ -224,7 +230,8 @@ class Check(object):
return values
def get_samples(self, expire=True):
"Return all values {metric: value} for non-tagged metrics"
"""Return all values {metric: value} for non-tagged metrics.
"""
values = {}
for m in self._sample_store:
try:
@ -234,8 +241,9 @@ class Check(object):
pass
return values
def get_metrics(self, expire=True):
def get_metrics(self, expire=True, prettyprint=False):
"""Get all metrics, including the ones that are tagged.
This is the preferred method to retrieve metrics
@return the list of samples
@ -251,7 +259,7 @@ class Check(object):
try:
ts, val, hostname, device_name = self.get_sample_with_timestamp(
m, dimensions, device_name, expire)
except UnknownValue:
except monagent.common.exceptions.UnknownValue:
continue
attributes = {}
if dimensions_list:
@ -273,27 +281,22 @@ class AgentCheck(object):
keystone = None
def __init__(self, name, init_config, agent_config, instances=None):
"""
Initialize a new check.
"""Initialize a new check.
:param name: The name of the check
:param init_config: The config for initializing the check
:param agent_config: The global configuration for the agent
:param instances: A list of configuration objects for each instance.
"""
from monagent.common.aggregator import MetricsAggregator
self.name = name
self.init_config = init_config
self.agent_config = agent_config
self.hostname = get_hostname(agent_config)
self.hostname = monagent.common.util.get_hostname(agent_config)
self.log = logging.getLogger('%s.%s' % (__name__, name))
self.aggregator = MetricsAggregator(
self.hostname,
recent_point_threshold=agent_config.get(
'recent_point_threshold',
None))
self.aggregator = monagent.common.aggregator.MetricsAggregator(self.hostname,
recent_point_threshold=agent_config.get('recent_point_threshold',
None))
self.events = []
self.instances = instances or []
@ -301,20 +304,19 @@ class AgentCheck(object):
self.library_versions = None
api_config = self.agent_config['Api']
AgentCheck.keystone = Keystone(api_config['keystone_url'],
api_config['username'],
api_config['password'],
api_config['project_name'])
AgentCheck.keystone = monagent.common.keystone.Keystone(api_config['keystone_url'],
api_config['username'],
api_config['password'],
api_config['project_name'])
def instance_count(self):
""" Return the number of instances that are configured for this check. """
"""Return the number of instances that are configured for this check.
"""
return len(self.instances)
def gauge(self, metric, value, dimensions=None,
hostname=None, device_name=None, timestamp=None):
"""
Record the value of a gauge, with optional dimensions, hostname and device
name.
"""Record the value of a gauge, with optional dimensions, hostname and device name.
:param metric: The name of the metric
:param value: The value of the gauge
@ -326,8 +328,7 @@ class AgentCheck(object):
self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
"""
Increment a counter with optional dimensions, hostname and device name.
"""Increment a counter with optional dimensions, hostname and device name.
:param metric: The name of the metric
:param value: The value to increment by
@ -338,8 +339,7 @@ class AgentCheck(object):
self.aggregator.increment(metric, value, dimensions, hostname, device_name)
def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
"""
Increment a counter with optional dimensions, hostname and device name.
"""Decrement a counter with optional dimensions, hostname and device name.
:param metric: The name of the metric
:param value: The value to decrement by
@ -350,8 +350,8 @@ class AgentCheck(object):
self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
"""
Submit a point for a metric that will be calculated as a rate on flush.
"""Submit a point for a metric that will be calculated as a rate on flush.
Values will persist across each call to `check` if there is not enough
point to generate a rate on the flush.
@ -364,8 +364,7 @@ class AgentCheck(object):
self.aggregator.rate(metric, value, dimensions, hostname, device_name)
def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
"""
Sample a histogram value, with optional dimensions, hostname and device name.
"""Sample a histogram value, with optional dimensions, hostname and device name.
:param metric: The name of the metric
:param value: The value to sample for the histogram
@ -376,8 +375,7 @@ class AgentCheck(object):
self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
"""
Sample a set value, with optional dimensions, hostname and device name.
"""Sample a set value, with optional dimensions, hostname and device name.
:param metric: The name of the metric
:param value: The value for the set
@ -388,8 +386,7 @@ class AgentCheck(object):
self.aggregator.set(metric, value, dimensions, hostname, device_name)
def event(self, event):
"""
Save an event.
"""Save an event.
:param event: The event payload as a dictionary. Has the following
structure:
@ -412,8 +409,7 @@ class AgentCheck(object):
self.events.append(event)
def has_events(self):
"""
Check whether the check has saved any events
"""Check whether the check has saved any events
@return whether or not the check has saved any events
@rtype boolean
@ -421,8 +417,7 @@ class AgentCheck(object):
return len(self.events) > 0
def get_metrics(self, prettyprint=False):
"""
Get all metrics, including the ones that are tagged.
"""Get all metrics, including the ones that are tagged.
@return the list of samples
@rtype list of Measurement objects from monagent.common.metrics
@ -444,8 +439,7 @@ class AgentCheck(object):
return self.aggregator.flush()
def get_events(self):
"""
Return a list of the events saved by the check, if any
"""Return a list of the events saved by the check, if any
@return the list of events saved by this check
@rtype list of event dictionaries
@ -455,13 +449,13 @@ class AgentCheck(object):
return events
def has_warnings(self):
"""
Check whether the instance run created any warnings
"""Check whether the instance run created any warnings.
"""
return len(self.warnings) > 0
def warning(self, warning_message):
""" Add a warning message that will be printed in the info page
"""Add a warning message that will be printed in the info page
:param warning_message: String. Warning message to be displayed
"""
self.warnings.append(warning_message)
@ -475,43 +469,45 @@ class AgentCheck(object):
pass
def get_library_versions(self):
""" Should return a string that shows which version
of the needed libraries are used """
"""Should return a string that shows which version
of the needed libraries are used
"""
raise NotImplementedError
def get_warnings(self):
"""
Return the list of warnings messages to be displayed in the info page
"""Return the list of warnings messages to be displayed in the info page.
"""
warnings = self.warnings
self.warnings = []
return warnings
def run(self):
""" Run all instances. """
"""Run all instances.
"""
instance_statuses = []
for i, instance in enumerate(self.instances):
try:
instance['keystone'] = AgentCheck.keystone
self.check(instance)
if self.has_warnings():
instance_status = check_status.InstanceStatus(i,
check_status.STATUS_WARNING,
warnings=self.get_warnings())
instance_status = monagent.common.check_status.InstanceStatus(i,
monagent.common.check_status.STATUS_WARNING,
warnings=self.get_warnings())
else:
instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK)
instance_status = monagent.common.check_status.InstanceStatus(i,
monagent.common.check_status.STATUS_OK)
except Exception as e:
self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
instance_status = check_status.InstanceStatus(i,
check_status.STATUS_ERROR,
error=e,
tb=traceback.format_exc())
instance_status = monagent.common.check_status.InstanceStatus(i,
monagent.common.check_status.STATUS_ERROR,
error=e,
tb=traceback.format_exc())
instance_statuses.append(instance_status)
return instance_statuses
def check(self, instance):
"""
Overriden by the check class. This will be called to run the check.
"""Overriden by the check class. This will be called to run the check.
:param instance: A dict with the instance information. This will vary
depending on your config structure.
@ -520,21 +516,19 @@ class AgentCheck(object):
@staticmethod
def stop():
"""
To be executed when the agent is being stopped to clean ressources
"""To be executed when the agent is being stopped to clean ressources.
"""
pass
@classmethod
def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
"""A method used for testing your check without running the agent.
"""
A method used for testing your check without running the agent.
"""
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
if hasattr(yaml, 'CLoader'):
Loader = yaml.CLoader
else:
Loader = yaml.Loader
if path_to_yaml:
check_name = os.path.basename(path_to_yaml).split('.')[0]
try:
@ -551,9 +545,7 @@ class AgentCheck(object):
@staticmethod
def normalize(metric, prefix=None):
"""
Turn a metric into a well-formed metric name
prefix.b.c
"""Turn a metric into a well-formed metric name prefix.b.c
:param metric The metric name to normalize
:param prefix A prefix to to add to the normalized name, default None
@ -587,10 +579,11 @@ class AgentCheck(object):
def run_check(name, path=None):
from tests.common import get_check
import tests.common
# Read the config file
confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name)
confd_path = path or os.path.join(monagent.common.config.get_confd_path(monagent.common.util.get_os()),
'%s.yaml' % name)
try:
f = open(confd_path)
@ -601,13 +594,13 @@ def run_check(name, path=None):
f.close()
# Run the check
check, instances = get_check(name, config_str)
check, instances = tests.common.get_check(name, config_str)
if not instances:
raise Exception('YAML configuration returned no instances.')
for instance in instances:
check.check(instance)
if check.has_events():
print("Events:\n")
pprint(check.get_events(), indent=4)
pprint.pprint(check.get_events(), indent=4)
print("Metrics:\n")
pprint(check.get_metrics(), indent=4)
pprint.pprint(check.get_metrics(), indent=4)

View File

@ -1,15 +1,14 @@
# Core modules
import logging
import threading
import time
import socket
from monagent.common.metrics import Measurement
from monagent.common.util import get_os, Timer
import system.unix as u
import system.win32 as w32
from datadog import Dogstreams
from monagent.common.check_status import CheckStatus, CollectorStatus, EmitterStatus
import threading
import time
import monagent.common.check_status
import monagent.common.metrics
import monagent.common.util
log = logging.getLogger(__name__)
@ -25,15 +24,15 @@ FLUSH_LOGGING_INITIAL = 5
class Collector(object):
"""
The collector is responsible for collecting data from each check and
"""The collector is responsible for collecting data from each check and
passing it along to the emitters, who send it to their final destination.
"""
def __init__(self, agent_config, emitter, checksd=None):
self.emit_duration = None
self.agent_config = agent_config
self.os = get_os()
self.os = monagent.common.util.get_os()
self.plugins = None
self.emitter = emitter
socket.setdefaulttimeout(15)
@ -74,23 +73,24 @@ class Collector(object):
self.init_failed_checks_d = checksd['init_failed_checks']
def _emit(self, payload):
""" Send the payload via the emitter. """
"""Send the payload via the emitter.
"""
statuses = []
# Don't try to send to an emitter if we're stopping/
if self.continue_running:
name = self.emitter.__name__
emitter_status = EmitterStatus(name)
emitter_status = monagent.common.check_status.EmitterStatus(name)
try:
self.emitter(payload, log, self.agent_config['forwarder_url'])
except Exception as e:
log.exception("Error running emitter: %s" % self.emitter.__name__)
emitter_status = EmitterStatus(name, e)
emitter_status = monagent.common.check_status.EmitterStatus(name, e)
statuses.append(emitter_status)
return statuses
def _set_status(self, check_statuses, emitter_statuses, collect_duration):
try:
CollectorStatus(check_statuses, emitter_statuses).persist()
monagent.common.check_status.CollectorStatus(check_statuses, emitter_statuses).persist()
except Exception:
log.exception("Error persisting collector status")
@ -125,13 +125,11 @@ class Collector(object):
return metrics
def run(self):
"""
Collect data from each check and submit their data.
"""Collect data from each check and submit their data.
There are currently two types of checks the system checks and the configured ones from checks_d
"""
timer = Timer()
if self.os != 'windows':
cpu_clock = time.clock()
timer = monagent.common.util.Timer()
self.run_count += 1
log.debug("Starting collection run #%s" % self.run_count)
@ -144,7 +142,7 @@ class Collector(object):
for check_type in self._legacy_checks:
try:
for name, value in check_type.check().iteritems():
metrics_list.append(Measurement(name, timestamp, value, {}))
metrics_list.append(monagent.common.metrics.Measurement(name, timestamp, value, {}))
except Exception:
log.exception('Error running check.')
@ -163,10 +161,10 @@ class Collector(object):
# Add in metrics on the collector run, emit_duration is from the previous run
for name, value in self.collector_stats(len(metrics_list), len(events),
collect_duration, self.emit_duration).iteritems():
metrics_list.append(Measurement(name,
timestamp,
value,
{'service': 'monasca', 'component': 'collector'}))
metrics_list.append(monagent.common.metrics.Measurement(name,
timestamp,
value,
{'service': 'monasca', 'component': 'collector'}))
emitter_statuses = self._emit(metrics_list)
self.emit_duration = timer.step()
@ -175,8 +173,9 @@ class Collector(object):
self._set_status(checks_statuses, emitter_statuses, collect_duration)
def run_checks_d(self):
""" Run defined checks_d checks.
returns a list of Measurements, a dictionary of events and a list of check statuses.
"""Run defined checks_d checks.
returns a list of Measurements, a dictionary of events and a list of check statuses.
"""
measurements = []
events = {}
@ -210,23 +209,22 @@ class Collector(object):
except Exception:
log.exception("Error running check %s" % check.name)
check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count,
library_versions=check.get_library_info())
check_status = monagent.common.check_status.CheckStatus(check.name, instance_statuses, metric_count, event_count,
library_versions=check.get_library_info())
check_statuses.append(check_status)
for check_name, info in self.init_failed_checks_d.iteritems():
if not self.continue_running:
return
check_status = CheckStatus(check_name, None, None, None,
init_failed_error=info['error'],
init_failed_traceback=info['traceback'])
check_status = monagent.common.check_status.CheckStatus(check_name, None, None, None,
init_failed_error=info['error'],
init_failed_traceback=info['traceback'])
check_statuses.append(check_status)
return measurements, events, check_statuses
def stop(self):
"""
Tell the collector to stop at the next logical point.
"""Tell the collector to stop at the next logical point.
"""
# This is called when the process is being killed, so
# try to stop the collector as soon as possible.

View File

@ -1,13 +1,13 @@
import datetime
import itertools
import os
import traceback
import re
import time
from datetime import datetime
from itertools import groupby # >= python 2.4
import traceback
from utils import TailFile
from monagent.common.util import LaconicFilter
from monagent.collector import modules
import monagent.collector
import monagent.common.util
import utils
if hasattr('some string', 'partition'):
@ -111,7 +111,7 @@ class Dogstream(object):
if parser_spec:
try:
parse_func = modules.load(parser_spec, 'parser')
parse_func = monagent.collector.modules.load(parser_spec, 'parser')
if isinstance(parse_func, type):
logger.info('Instantiating class-based dogstream')
parse_func = parse_func(
@ -142,7 +142,7 @@ class Dogstream(object):
self.class_based = class_based
# Apply LaconicFilter to avoid log flooding
self.logger.addFilter(LaconicFilter("dogstream"))
self.logger.addFilter(monagent.common.util.LaconicFilter("dogstream"))
self.log_path = log_path
self.parse_func = parse_func or self._default_line_parser
@ -163,7 +163,7 @@ class Dogstream(object):
# Build our tail -f
if self._gen is None:
self._gen = TailFile(
self._gen = utils.TailFile(
self.logger,
self.log_path,
self._line_parser).tail(
@ -202,7 +202,7 @@ class Dogstream(object):
else:
try:
parsed = self.parse_func(self.logger, line, self.parser_state, *self.parse_args)
except TypeError as e:
except TypeError:
# Arity of parse_func is 3 (old-style), not 4
parsed = self.parse_func(self.logger, line)
@ -250,7 +250,7 @@ class Dogstream(object):
try:
# Bucket points into 15 second buckets
ts = (int(float(ts)) / self._freq) * self._freq
date = datetime.fromtimestamp(ts)
date = datetime.datetime.fromtimestamp(ts)
assert date.year > 1990
except Exception:
invalid_reasons.append('invalid timestamp')
@ -265,14 +265,13 @@ class Dogstream(object):
repr(datum), ', '.join(invalid_reasons), line)
else:
self._values.append((metric, ts, value, attrs))
except Exception as e:
except Exception:
self.logger.debug("Error while parsing line %s" % line, exc_info=True)
self._error_count += 1
self.logger.error("Parser error: %s out of %s" % (self._error_count, self._line_count))
@staticmethod
def _default_line_parser(logger, line):
original_line = line
sep = ' '
metric, _, line = partition(line.strip(), sep)
timestamp, _, line = partition(line.strip(), sep)
@ -284,13 +283,14 @@ class Dogstream(object):
keyval, _, line = partition(line.strip(), sep)
key, val = keyval.split('=', 1)
attributes[key] = val
except Exception as e:
except Exception:
logger.debug(traceback.format_exc())
return metric, timestamp, value, attributes
def _aggregate(self, values):
""" Aggregate values down to the second and store as:
"""Aggregate values down to the second and store as:
{
"dogstream": [(metric, timestamp, value, {key: val})]
}
@ -300,7 +300,7 @@ class Dogstream(object):
values.sort(key=point_sorter)
for (timestamp, metric, host_name, device_name), val_attrs in groupby(values, key=point_sorter):
for (timestamp, metric, host_name, device_name), val_attrs in itertools.groupby(values, key=point_sorter):
attributes = {}
vals = []
for _metric, _timestamp, v, a in val_attrs:
@ -519,6 +519,3 @@ class NagiosServicePerfData(NagiosPerfData):
if middle_name:
metric.append(middle_name.replace(' ', '_').lower())
return metric
if __name__ == '__main__':
testddForwarder()

View File

@ -32,28 +32,34 @@ SENTINEL = "QUIT"
def is_sentinel(obj):
"""Predicate to determine whether an item from the queue is the
signal to stop"""
signal to stop
"""
return isinstance(obj, str) and obj == SENTINEL
class TimeoutError(Exception):
"""Raised when a result is not available within the given timeout"""
"""Raised when a result is not available within the given timeout.
"""
pass
class PoolWorker(threading.Thread):
"""Thread that consumes WorkUnits from a queue to process them"""
"""Thread that consumes WorkUnits from a queue to process them.
"""
def __init__(self, workq, *args, **kwds):
"""\param workq: Queue object to consume the work units from"""
"""\param workq: Queue object to consume the work units from.
"""
threading.Thread.__init__(self, *args, **kwds)
self._workq = workq
self.running = False
def run(self):
"""Process the work unit, or wait for sentinel to exit"""
"""Process the work unit, or wait for sentinel to exit.
"""
while True:
self.running = True
workunit = self._workq.get()
@ -69,14 +75,15 @@ class PoolWorker(threading.Thread):
class Pool(object):
"""
The Pool class represents a pool of worker threads. It has methods
which allows tasks to be offloaded to the worker processes in a
few different ways
The Pool class represents a pool of worker threads.
It has methods which allows tasks to be offloaded to the
worker processes in a few different ways.
"""
def __init__(self, nworkers, name="Pool"):
"""
\param nworkers (integer) number of worker threads to start
"""\param nworkers (integer) number of worker threads to start
\param name (string) prefix for the worker threads' name
"""
self._workq = Queue.Queue()
@ -86,7 +93,7 @@ class Pool(object):
thr = PoolWorker(self._workq, name="Worker-%s-%d" % (name, idx))
try:
thr.start()
except:
except Exception:
# If one thread has a problem, undo everything
self.terminate()
raise
@ -97,25 +104,28 @@ class Pool(object):
return len([w for w in self._workers if w.running])
def apply(self, func, args=(), kwds=None):
"""Equivalent of the apply() builtin function. It blocks till
the result is ready."""
"""Equivalent of the apply() builtin function.
It blocks till the result is ready.
"""
if not kwds:
kwds = dict()
return self.apply_async(func, args, kwds).get()
def map(self, func, iterable, chunksize=None):
"""A parallel equivalent of the map() builtin function. It
blocks till the result is ready.
"""A parallel equivalent of the map() builtin function.
It blocks till the result is ready.
This method chops the iterable into a number of chunks which
it submits to the process pool as separate tasks. The
(approximate) size of these chunks can be specified by setting
chunksize to a positive integer."""
chunksize to a positive integer.
"""
return self.map_async(func, iterable, chunksize).get()
def imap(self, func, iterable, chunksize=1):
"""
An equivalent of itertools.imap().
"""An equivalent of itertools.imap().
The chunksize argument is the same as the one used by the
map() method. For very long iterables using a large value for
@ -133,22 +143,24 @@ class Pool(object):
def imap_unordered(self, func, iterable, chunksize=1):
"""The same as imap() except that the ordering of the results
from the returned iterator should be considered
arbitrary. (Only when there is only one worker process is the
order guaranteed to be "correct".)"""
from the returned iterator should be considered arbitrary.
(Only when there is only one worker process is the order
guaranteed to be "correct".)
"""
collector = UnorderedResultCollector()
self._create_sequences(func, iterable, chunksize, collector)
return iter(collector)
def apply_async(self, func, args=(), kwds=None, callback=None):
"""A variant of the apply() method which returns an
ApplyResult object.
"""A variant of the apply() method which returns an ApplyResult object.
If callback is specified then it should be a callable which
accepts a single argument. When the result becomes ready,
callback is applied to it (unless the call failed). callback
should complete immediately since otherwise the thread which
handles the results will get blocked."""
handles the results will get blocked.
"""
if not kwds:
kwds = dict()
assert not self._closed # No lock here. We assume it's atomic...
@ -158,14 +170,14 @@ class Pool(object):
return apply_result
def map_async(self, func, iterable, chunksize=None, callback=None):
"""A variant of the map() method which returns a ApplyResult
object.
"""A variant of the map() method which returns a ApplyResult object.
If callback is specified then it should be a callable which
accepts a single argument. When the result becomes ready
callback is applied to it (unless the call failed). callback
should complete immediately since otherwise the thread which
handles the results will get blocked."""
handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback)
collector = OrderedResultCollector(apply_result, as_iterator=False)
self._create_sequences(func, iterable, chunksize, collector)
@ -173,6 +185,7 @@ class Pool(object):
def imap_async(self, func, iterable, chunksize=None, callback=None):
"""A variant of the imap() method which returns an ApplyResult
object that provides an iterator (next method(timeout)
available).
@ -180,7 +193,8 @@ class Pool(object):
accepts a single argument. When the resulting iterator becomes
ready, callback is applied to it (unless the call
failed). callback should complete immediately since otherwise
the thread which handles the results will get blocked."""
the thread which handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback)
collector = OrderedResultCollector(apply_result, as_iterator=True)
self._create_sequences(func, iterable, chunksize, collector)
@ -189,30 +203,35 @@ class Pool(object):
def imap_unordered_async(self, func, iterable, chunksize=None,
callback=None):
"""A variant of the imap_unordered() method which returns an
ApplyResult object that provides an iterator (next
method(timeout) available).
ApplyResult object that provides an iterator (next method(timeout)
available).
If callback is specified then it should be a callable which
accepts a single argument. When the resulting iterator becomes
ready, callback is applied to it (unless the call
failed). callback should complete immediately since otherwise
the thread which handles the results will get blocked."""
the thread which handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback)
collector = UnorderedResultCollector(apply_result)
self._create_sequences(func, iterable, chunksize, collector)
return apply_result
def close(self):
"""Prevents any more tasks from being submitted to the
pool. Once all the tasks have been completed the worker
processes will exit."""
"""Prevents any more tasks from being submitted to the pool.
Once all the tasks have been completed the worker
processes will exit.
"""
# No lock here. We assume it's sufficiently atomic...
self._closed = True
def terminate(self):
"""Stops the worker processes immediately without completing
outstanding work. When the pool object is garbage collected
terminate() will be called immediately."""
"""Stops the worker processes immediately without completing outstanding work.
When the pool object is garbage collected terminate() will be called immediately.
"""
self.close()
# Clearing the job queue
@ -228,18 +247,19 @@ class Pool(object):
self._workq.put(SENTINEL)
def join(self):
"""Wait for the worker processes to exit. One must call
close() or terminate() before using join()."""
"""Wait for the worker processes to exit.
One must call close() or terminate() before using join().
"""
for thr in self._workers:
thr.join()
def _create_sequences(self, func, iterable, chunksize, collector=None):
"""
Create the WorkUnit objects to process and pushes them on the
work queue. Each work unit is meant to process a slice of
iterable of size chunksize. If collector is specified, then
the ApplyResult objects associated with the jobs will notify
collector when their result becomes ready.
"""Create the WorkUnit objects to process and pushes them on the work queue.
Each work unit is meant to process a slice of iterable of size chunksize.
If collector is specified, then the ApplyResult objects associated with
the jobs will notify collector when their result becomes ready.
\return the list of WorkUnit objects (basically: JobSequences)
pushed onto the work queue
@ -271,8 +291,10 @@ class Pool(object):
class WorkUnit(object):
"""ABC for a unit of work submitted to the worker threads. It's
basically just an object equipped with a process() method"""
"""ABC for a unit of work submitted to the worker threads.
It's basically just an object equipped with a process() method
"""
def process(self):
"""Do the work. Shouldn't raise any exception"""
@ -281,11 +303,12 @@ class WorkUnit(object):
class Job(WorkUnit):
"""A work unit that corresponds to the execution of a single function"""
"""A work unit that corresponds to the execution of a single function.
"""
def __init__(self, func, args, kwds, apply_result):
"""
\param func/args/kwds used to call the function
"""\param func/args/kwds used to call the function
\param apply_result ApplyResult object that holds the result
of the function call
"""
@ -296,14 +319,14 @@ class Job(WorkUnit):
self._result = apply_result
def process(self):
"""
Call the function with the args/kwds and tell the ApplyResult
"""Call the function with the args/kwds and tell the ApplyResult
that its result is ready. Correctly handles the exceptions
happening during the execution of the function
happening during the execution of the function.
"""
try:
result = self._func(*self._args, **self._kwds)
except:
except Exception:
self._result._set_exception()
else:
self._result._set_value(result)
@ -312,15 +335,15 @@ class Job(WorkUnit):
class JobSequence(WorkUnit):
"""A work unit that corresponds to the processing of a continuous
sequence of Job objects"""
sequence of Job objects
"""
def __init__(self, jobs):
WorkUnit.__init__(self)
self._jobs = jobs
def process(self):
"""
Call process() on all the Job objects that have been specified
"""Call process() on all the Job objects that have been specified.
"""
for job in self._jobs:
job.process()
@ -329,16 +352,18 @@ class JobSequence(WorkUnit):
class ApplyResult(object):
"""An object associated with a Job object that holds its result:
it's available during the whole life the Job and after, even when
the Job didn't process yet. It's possible to use this object to
wait for the result/exception of the job to be available.
The result objects returns by the Pool::*_async() methods are of
this type"""
this type
"""
def __init__(self, collector=None, callback=None):
"""
\param collector when not None, the notify_ready() method of
"""\param collector when not None, the notify_ready() method of
the collector will be called when the result from the Job is
ready
\param callback when not None, function to call when the
@ -356,11 +381,11 @@ class ApplyResult(object):
self._collector = collector
def get(self, timeout=None):
"""
Returns the result when it arrives. If timeout is not None and
the result does not arrive within timeout seconds then
TimeoutError is raised. If the remote call raised an exception
then that exception will be reraised by get().
"""Returns the result when it arrives.
If timeout is not None and the result does not arrive within timeout
seconds then TimeoutError is raised. If the remote call raised an
exception then that exception will be re-raised by get().
"""
if not self.wait(timeout):
raise TimeoutError("Result not available within %fs" % timeout)
@ -369,27 +394,31 @@ class ApplyResult(object):
raise self._data[0], self._data[1], self._data[2]
def wait(self, timeout=None):
"""Waits until the result is available or until timeout
seconds pass."""
"""Waits until the result is available or until timeout seconds pass.
"""
self._event.wait(timeout)
return self._event.isSet()
def ready(self):
"""Returns whether the call has completed."""
"""Returns whether the call has completed.
"""
return self._event.isSet()
def successful(self):
"""Returns whether the call completed without raising an
exception. Will raise AssertionError if the result is not
ready."""
"""Returns whether the call completed without raising an exception.
Will raise AssertionError if the result is not ready.
"""
assert self.ready()
return self._success
def _set_value(self, value):
"""Called by a Job object to tell the result is ready, and
provides the value of this result. The object will become
ready and successful. The collector's notify_ready() method
will be called, and the callback method too"""
will be called, and the callback method too.
"""
assert not self.ready()
self._data = value
self._success = True
@ -399,14 +428,16 @@ class ApplyResult(object):
if self._callback is not None:
try:
self._callback(value)
except:
except Exception:
traceback.print_exc()
def _set_exception(self):
"""Called by a Job object to tell that an exception occured
during the processing of the function. The object will become
ready but not successful. The collector's notify_ready()
method will be called, but NOT the callback method"""
method will be called, but NOT the callback method
"""
assert not self.ready()
self._data = sys.exc_info()
self._success = False
@ -417,22 +448,25 @@ class ApplyResult(object):
class AbstractResultCollector(object):
"""ABC to define the interface of a ResultCollector object. It is
basically an object which knows whuich results it's waiting for,
"""ABC to define the interface of a ResultCollector object.
It is basically an object which knows whuich results it's waiting for,
and which is able to get notify when they get available. It is
also able to provide an iterator over the results when they are
available"""
available.
"""
def __init__(self, to_notify):
"""
\param to_notify ApplyResult object to notify when all the
"""\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None.
"""
self._to_notify = to_notify
def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will
always be called BEFORE the Jobs get submitted to the work
"""Used to identify which results we're waiting for.
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can
be called
\param apply_result ApplyResult object to add in our collection
@ -441,6 +475,7 @@ class AbstractResultCollector(object):
def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job
@ -450,6 +485,7 @@ class AbstractResultCollector(object):
def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the
result's values one after another (order defined by the
implementation)
\param idx The index of the result we want, wrt collector's order
@ -460,19 +496,23 @@ class AbstractResultCollector(object):
raise NotImplementedError("Children classes must implement it")
def __iter__(self):
"""Return a new CollectorIterator object for this collector"""
"""Return a new CollectorIterator object for this collector.
"""
return CollectorIterator(self)
class CollectorIterator(object):
"""An iterator that allows to iterate over the result values
available in the given collector object. Equipped with an extended
next() method accepting a timeout argument. Created by the
AbstractResultCollector::__iter__() method"""
AbstractResultCollector::__iter__() method
"""
def __init__(self, collector):
"""\param AbstractResultCollector instance"""
"""\param AbstractResultCollector instance.
"""
self._collector = collector
self._idx = 0
@ -480,16 +520,18 @@ class CollectorIterator(object):
return self
def next(self, timeout=None):
"""Return the next result value in the sequence. Raise
StopIteration at the end. Can raise the exception raised by
the Job"""
"""Return the next result value in the sequence.
Raise StopIteration at the end. Can raise the exception raised by
the Job.
"""
try:
apply_result = self._collector._get_result(self._idx, timeout)
except IndexError:
# Reset for next time
self._idx = 0
raise StopIteration
except:
except Exception:
self._idx = 0
raise
self._idx += 1
@ -500,13 +542,15 @@ class CollectorIterator(object):
class UnorderedResultCollector(AbstractResultCollector):
"""An AbstractResultCollector implementation that collects the
values of the ApplyResult objects in the order they become ready. The
CollectorIterator object returned by __iter__() will iterate over
them in the order they become ready"""
them in the order they become ready.
"""
def __init__(self, to_notify=None):
"""
\param to_notify ApplyResult object to notify when all the
"""\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None.
"""
AbstractResultCollector.__init__(self, to_notify)
@ -515,8 +559,9 @@ class UnorderedResultCollector(AbstractResultCollector):
self._expected = 0
def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will
always be called BEFORE the Jobs get submitted to the work
"""Used to identify which results we're waiting for.
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can
be called
\param apply_result ApplyResult object to add in our collection
@ -525,6 +570,7 @@ class UnorderedResultCollector(AbstractResultCollector):
def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the
result's values one after another, in the order the results have
become available.
\param idx The index of the result we want, wrt collector's order
@ -553,6 +599,7 @@ class UnorderedResultCollector(AbstractResultCollector):
def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job
@ -575,13 +622,15 @@ class UnorderedResultCollector(AbstractResultCollector):
class OrderedResultCollector(AbstractResultCollector):
"""An AbstractResultCollector implementation that collects the
values of the ApplyResult objects in the order they have been
submitted. The CollectorIterator object returned by __iter__()
will iterate over them in the order they have been submitted"""
will iterate over them in the order they have been submitted.
"""
def __init__(self, to_notify=None, as_iterator=True):
"""
\param to_notify ApplyResult object to notify when all the
"""\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None.
\param as_iterator boolean telling whether the result value
set on to_notify should be an iterator (available as soon as 1
@ -595,8 +644,9 @@ class OrderedResultCollector(AbstractResultCollector):
self._as_iterator = as_iterator
def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will
always be called BEFORE the Jobs get submitted to the work
"""Used to identify which results we're waiting for.
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can
be called
\param apply_result ApplyResult object to add in our collection
@ -606,6 +656,7 @@ class OrderedResultCollector(AbstractResultCollector):
def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the
result's values one after another (order defined by the
implementation)
\param idx The index of the result we want, wrt collector's order
@ -619,6 +670,7 @@ class OrderedResultCollector(AbstractResultCollector):
def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job
@ -641,25 +693,25 @@ class OrderedResultCollector(AbstractResultCollector):
elif not self._as_iterator and got_last:
try:
lst = [r.get(0) for r in self._results]
except:
except Exception:
self._to_notify._set_exception()
else:
self._to_notify._set_value(lst)
def _test():
"""Some tests"""
import thread
"""Some tests.
"""
import time
def f(x):
return x * x
def work(seconds):
print("[%d] Start to work for %fs..." % (thread.get_ident(), seconds))
print("[%d] Start to work for %fs..." % (threading.thread.get_ident(), seconds))
time.sleep(seconds)
print("[%d] Work done (%fs)." % (thread.get_ident(), seconds))
return "%d slept %fs" % (thread.get_ident(), seconds)
print("[%d] Work done (%fs)." % (threading.thread.get_ident(), seconds))
return "%d slept %fs" % (threading.thread.get_ident(), seconds)
# Test copy/pasted from multiprocessing
pool = Pool(9) # start 4 worker threads

View File

@ -1,10 +1,10 @@
from collections import namedtuple
import time
from Queue import Queue, Empty
import collections
import Queue
import threading
import time
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.libs.thread_pool import Pool
import monagent.collector.checks
import monagent.collector.checks.libs.thread_pool
TIMEOUT = 180
@ -12,16 +12,16 @@ DEFAULT_SIZE_POOL = 6
MAX_LOOP_ITERATIONS = 1000
FAILURE = "FAILURE"
up_down = namedtuple('up_down', ['UP', 'DOWN'])
up_down = collections.namedtuple('up_down', ['UP', 'DOWN'])
Status = up_down('UP', 'DOWN')
EventType = up_down("servicecheck.state_change.up", "servicecheck.state_change.down")
class ServicesCheck(AgentCheck):
class ServicesCheck(monagent.collector.checks.AgentCheck):
SOURCE_TYPE_NAME = 'servicecheck'
"""
Services checks inherits from this class.
"""Services checks inherits from this class.
This class should never be directly instanciated.
Work flow:
@ -36,11 +36,10 @@ class ServicesCheck(AgentCheck):
Status.UP or Status.DOWN.
The second element is a short error message that will be displayed
when the service turns down.
"""
def __init__(self, name, init_config, agentConfig, instances):
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
monagent.collector.checks.AgentCheck.__init__(self, name, init_config, agentConfig, instances)
# A dictionary to keep track of service statuses
self.statuses = {}
@ -60,9 +59,9 @@ class ServicesCheck(AgentCheck):
default_size = min(self.instance_count(), DEFAULT_SIZE_POOL)
self.pool_size = int(self.init_config.get('threads_count', default_size))
self.pool = Pool(self.pool_size)
self.pool = monagent.collector.checks.libs.thread_pool.Pool(self.pool_size)
self.resultsq = Queue()
self.resultsq = Queue.Queue()
self.jobs_status = {}
self.pool_started = True
@ -110,7 +109,7 @@ class ServicesCheck(AgentCheck):
# We put the results in the result queue
self.resultsq.put(result)
except Exception as e:
except Exception:
result = (FAILURE, FAILURE, FAILURE, FAILURE)
self.resultsq.put(result)
@ -119,7 +118,7 @@ class ServicesCheck(AgentCheck):
try:
# We want to fetch the result in a non blocking way
status, msg, name, queue_instance = self.resultsq.get_nowait()
except Empty:
except Queue.Empty:
break
if status == FAILURE:
@ -165,13 +164,12 @@ class ServicesCheck(AgentCheck):
del self.jobs_status[name]
def _check(self, instance):
"""This function should be implemented by inherited classes"""
"""This function should be implemented by inherited classes.
"""
raise NotImplementedError
def _clean(self):
now = time.time()
stuck_process = None
stuck_time = time.time()
for name in self.jobs_status.keys():
start_time = self.jobs_status[name]
if now - start_time > TIMEOUT:

View File

@ -1,8 +1,8 @@
"""
Unix system checks.
"""Unix system checks.
"""
# stdlib
import functools
import logging
import operator
import platform
@ -12,22 +12,24 @@ import sys
import time
# project
from monagent.collector.checks.check import Check
from monagent.common.metrics import Measurement
from monagent.common.util import Platform
from functools import reduce
import monagent.collector.checks.check
import monagent.common.metrics
import monagent.common.util
# locale-resilient float converter
to_float = lambda s: float(s.replace(",", "."))
class Disk(Check):
class Disk(monagent.collector.checks.check.Check):
""" Collects metrics about the machine's disks. """
"""Collects metrics about the machine's disks.
"""
def check(self):
"""Get disk space/inode stats"""
"""Get disk space/inode stats.
"""
# First get the configuration.
if self.agent_config is not None:
use_mount = self.agent_config.get("use_mount", False)
@ -58,12 +60,12 @@ class Disk(Check):
# parse into a list of Measurements
stats.update(inodes)
timestamp = time.time()
measurements = [
Measurement(
key.split(
'.', 1)[1], timestamp, value, {
'device': key.split(
'.', 1)[0]}) for key, value in stats.iteritems()]
measurements = [monagent.common.metrics.Measurement(key.split('.', 1)[1],
timestamp,
value,
{'device': key.split('.', 1)[0]})
for key, value in stats.iteritems()]
return measurements
except Exception:
@ -72,9 +74,9 @@ class Disk(Check):
def parse_df_output(
self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None):
"""
Parse the output of the df command. If use_volume is true the volume
is used to anchor the metric, otherwise false the mount
"""Parse the output of the df command.
If use_volume is true the volume is used to anchor the metric, otherwise false the mount
point is used. Returns a tuple of (disk, inode).
"""
usage_data = {}
@ -88,14 +90,14 @@ class Disk(Check):
if use_mount:
parts[0] = parts[-1]
if inodes:
if Platform.is_darwin(platform_name):
if monagent.common.util.Platform.is_darwin(platform_name):
# Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted
# Inodes are in position 5, 6 and we need to compute the total
# Total
parts[1] = int(parts[5]) + int(parts[6]) # Total
parts[2] = int(parts[5]) # Used
parts[3] = int(parts[6]) # Available
elif Platform.is_freebsd(platform_name):
elif monagent.common.util.Platform.is_freebsd(platform_name):
# Filesystem 1K-blocks Used Avail Capacity iused ifree %iused Mounted
# Inodes are in position 5, 6 and we need to compute the total
parts[1] = int(parts[5]) + int(parts[6]) # Total
@ -128,8 +130,7 @@ class Disk(Check):
return True
def _is_real_device(self, device):
"""
Return true if we should track the given device name and false otherwise.
"""Return true if we should track the given device name and false otherwise.
"""
# First, skip empty lines.
if not device or len(device) <= 1:
@ -164,10 +165,9 @@ class Disk(Check):
return devices
def _transform_df_output(self, df_output, blacklist_re):
"""
Given raw output for the df command, transform it into a normalized
list devices. A 'device' is a list with fields corresponding to the
output of df output on each platform.
"""Given raw output for the df command, transform it into a normalized list devices.
A 'device' is a list with fields corresponding to the output of df output on each platform.
"""
all_devices = [l.strip().split() for l in df_output.split("\n")]
@ -190,10 +190,10 @@ class Disk(Check):
return devices
class IO(Check):
class IO(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.header_re = re.compile(r'([%\\/\-_a-zA-Z0-9]+)[\s+]?')
self.item_re = re.compile(r'^([a-zA-Z0-9\/]+)')
self.value_re = re.compile(r'\d+\.\d+')
@ -252,7 +252,8 @@ class IO(Check):
@staticmethod
def xlate(metric_name, os_name):
"""Standardize on linux metric names"""
"""Standardize on linux metric names.
"""
if os_name == "sunos":
names = {"wait": "await",
"svc_t": "svctm",
@ -282,7 +283,7 @@ class IO(Check):
"""
io = {}
try:
if Platform.is_linux():
if monagent.common.util.Platform.is_linux():
stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'],
stdout=sp.PIPE,
close_fds=True).communicate()[0]
@ -394,7 +395,7 @@ class IO(Check):
for dev_name, stats in filtered_io.iteritems():
filtered_stats = {stat: stats[stat]
for stat in stats.iterkeys() if stat not in self.stat_blacklist}
m_list = [Measurement(key, timestamp, value, {'device': dev_name})
m_list = [monagent.common.metrics.Measurement(key, timestamp, value, {'device': dev_name})
for key, value in filtered_stats.iteritems()]
measurements.extend(m_list)
@ -405,10 +406,10 @@ class IO(Check):
return {}
class Load(Check):
class Load(monagent.collector.checks.check.Check):
def check(self):
if Platform.is_linux():
if monagent.common.util.Platform.is_linux():
try:
loadAvrgProc = open('/proc/loadavg', 'r')
uptime = loadAvrgProc.readlines()
@ -437,10 +438,10 @@ class Load(Check):
}
class Memory(Check):
class Memory(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
macV = None
if sys.platform == 'darwin':
macV = platform.mac_ver()
@ -464,7 +465,7 @@ class Memory(Check):
pass
def check(self):
if Platform.is_linux():
if monagent.common.util.Platform.is_linux():
try:
meminfoProc = open('/proc/meminfo', 'r')
lines = meminfoProc.readlines()
@ -736,10 +737,11 @@ class Memory(Check):
return {}
class Cpu(Check):
class Cpu(monagent.collector.checks.check.Check):
def check(self):
"""Return an aggregate of CPU stats across all CPUs
"""Return an aggregate of CPU stats across all CPUs.
When figures are not available, False is sent back.
"""
def format_results(us, sy, wa, idle, st):
@ -754,7 +756,8 @@ class Cpu(Check):
return data
def get_value(legend, data, name, filter_value=None):
"Using the legend and a metric name, get the value or None from the data line"
"""Using the legend and a metric name, get the value or None from the data line.
"""
if name in legend:
value = to_float(data[legend.index(name)])
if filter_value is not None:
@ -767,7 +770,7 @@ class Cpu(Check):
self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend))
return 0.0
if Platform.is_linux():
if monagent.common.util.Platform.is_linux():
mpstat = sp.Popen(['mpstat', '1', '3'], stdout=sp.PIPE, close_fds=True).communicate()[0]
# topdog@ip:~$ mpstat 1 3
# Linux 2.6.32-341-ec2 (ip) 01/19/2012 _x86_64_ (2 CPU)
@ -917,7 +920,7 @@ class Cpu(Check):
size = [get_value(headers, l.split(), "sze") for l in d_lines]
count = sum(size)
rel_size = [s / count for s in size]
dot = lambda v1, v2: reduce(operator.add, map(operator.mul, v1, v2))
dot = lambda v1, v2: functools.reduce(operator.add, map(operator.mul, v1, v2))
return format_results(dot(user, rel_size),
dot(kern, rel_size),
dot(wait, rel_size),
@ -932,9 +935,9 @@ class Cpu(Check):
def _get_subprocess_output(command):
"""
Run the given subprocess command and return it's output. Raise an Exception
if an error occurs.
"""Run the given subprocess command and return it's output.
Raise an Exception if an error occurs.
"""
proc = sp.Popen(command, stdout=sp.PIPE, close_fds=True)
return proc.stdout.read()

View File

@ -1,4 +1,4 @@
from monagent.collector.checks.check import Check
import monagent.collector.checks.check
try:
import wmi
@ -15,10 +15,10 @@ B2MB = float(1048576)
KB2MB = B2KB = float(1024)
class Processes(Check):
class Processes(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.gauge('system.proc.queue_length')
self.gauge('system.proc.count')
@ -31,7 +31,7 @@ class Processes(Check):
return
try:
cpu = w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0]
w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0]
except AttributeError:
self.logger.info('Missing Win32_PerfFormattedData_PerfOS_Processor WMI class.' +
' No process metrics will be returned.')
@ -44,10 +44,10 @@ class Processes(Check):
return self.get_metrics()
class Memory(Check):
class Memory(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger
self.gauge('system.mem.free')
self.gauge('system.mem.used')
@ -84,10 +84,10 @@ class Memory(Check):
return self.get_metrics()
class Cpu(Check):
class Cpu(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger
self.gauge('system.cpu.user')
self.gauge('system.cpu.idle')
@ -122,9 +122,10 @@ class Cpu(Check):
@staticmethod
def _average_metric(wmi_class, wmi_prop):
''' Sum all of the values of a metric from a WMI class object, excluding
the value for "_Total"
'''
"""Sum all of the values of a metric from a WMI class object.
Excludes the value for "_Total"
"""
val = 0
counter = 0
for wmi_object in wmi_class:
@ -142,10 +143,10 @@ class Cpu(Check):
return val
class Network(Check):
class Network(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger
self.gauge('system.net.bytes_rcvd')
self.gauge('system.net.bytes_sent')
@ -169,10 +170,10 @@ class Network(Check):
return self.get_metrics()
class Disk(Check):
class Disk(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger
self.gauge('system.disk.free')
self.gauge('system.disk.total')
@ -203,10 +204,10 @@ class Disk(Check):
return self.get_metrics()
class IO(Check):
class IO(monagent.collector.checks.check.Check):
def __init__(self, logger):
Check.__init__(self, logger)
monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger
self.gauge('system.io.wkb_s')
self.gauge('system.io.w_s')

View File

@ -1,12 +1,11 @@
import base64
import binascii
import os
import stat
# os.SEEK_END is defined in python 2.5
SEEK_END = 2
from stat import *
import binascii
def median(vals):
vals = sorted(vals)
@ -21,8 +20,9 @@ def median(vals):
def add_basic_auth(request, username, password):
""" A helper to add basic authentication to a urllib2 request. We do this
across a variety of checks so it's good to have this in one place.
"""A helper to add basic authentication to a urllib2 request.
We do this across a variety of checks so it's good to have this in one place.
"""
auth_str = base64.encodestring('%s:%s' % (username, password)).strip()
request.add_header('Authorization', 'Basic %s' % auth_str)
@ -52,8 +52,8 @@ class TailFile(object):
already_open = True
stat = os.stat(self._path)
inode = stat[ST_INO]
size = stat[ST_SIZE]
inode = stat[stat.ST_INO]
size = stat[stat.ST_SIZE]
# Compute CRC of the beginning of the file
crc = None
@ -98,8 +98,10 @@ class TailFile(object):
def tail(self, line_by_line=True, move_end=True):
"""Read line-by-line and run callback on each line.
line_by_line: yield each time a callback has returned True
move_end: start from the last line of the log"""
move_end: start from the last line of the log
"""
try:
self._open_file(move_end=move_end)

View File

@ -1,8 +1,8 @@
import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Apache(AgentCheck):

View File

@ -1,7 +1,7 @@
from collections import namedtuple
from fnmatch import fnmatch
import os
import time
from collections import namedtuple
from monagent.collector.checks import AgentCheck
@ -52,16 +52,16 @@ class Cacti(AgentCheck):
# The rrdtool module is required for the check to work
try:
import rrdtool
except ImportError as e:
except ImportError:
raise Exception(
"Cannot import rrdtool module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti")
"Cannot import rrdtool module. This module is required for the cacti plugin to work correctly")
# Try importing MySQL
try:
import MySQLdb
except ImportError as e:
except ImportError:
raise Exception(
"Cannot import MySQLdb module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti")
"Cannot import MySQLdb module. This module is required for the cacti plugin to work correctly")
connection = MySQLdb.connect(config.host, config.user, config.password, config.db)
@ -123,7 +123,9 @@ class Cacti(AgentCheck):
return Config(host, user, password, db, rrd_path, whitelist, field_names)
def _read_rrd(self, rrd_path, hostname, device_name):
''' Main metric fetching method '''
"""Main metric fetching method.
"""
import rrdtool
metric_count = 0
@ -177,9 +179,10 @@ class Cacti(AgentCheck):
return metric_count
def _fetch_rrd_meta(self, connection, rrd_path_root, whitelist, field_names):
''' Fetch metadata about each RRD in this Cacti DB, returning a list of
tuples of (hostname, device_name, rrd_path)
'''
"""Fetch metadata about each RRD in this Cacti DB.
Returns a list of tuples of (hostname, device_name, rrd_path)
"""
def _in_whitelist(rrd):
path = rrd.replace('<path_rra>/', '')
for p in whitelist:
@ -226,7 +229,9 @@ class Cacti(AgentCheck):
@staticmethod
def _format_metric_name(m_name, cfunc):
''' Format a cacti metric name into a Datadog-friendly name '''
"""Format a cacti metric name into a Datadog-friendly name.
"""
try:
aggr = CFUNC_TO_AGGR[cfunc]
except KeyError:
@ -242,16 +247,18 @@ class Cacti(AgentCheck):
@staticmethod
def _transform_metric(m_name, val):
''' Add any special case transformations here '''
"""Add any special case transformations here.
"""
# Report memory in MB
if m_name[0:11] in ('system.mem.', 'system.disk'):
return val / 1024
return val
'''
For backwards compatability with pre-checks_d configuration.
Convert old-style config to new-style config.
'''
"""For backwards compatability with pre-checks_d configuration.
Convert old-style config to new-style config.
"""
@staticmethod
def parse_agent_config(agentConfig):
required = ['cacti_mysql_server', 'cacti_mysql_user', 'cacti_rrd_path']

View File

@ -1,13 +1,14 @@
import urllib2
import json
import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.common.util import headers
class CouchDb(AgentCheck):
"""Extracts stats from CouchDB via its REST API
http://wiki.apache.org/couchdb/Runtime_Statistics
"""
@ -28,7 +29,9 @@ class CouchDb(AgentCheck):
self.gauge(metric_name, val, dimensions=metric_dimensions, device_name=db_name)
def _get_stats(self, url):
"Hit a given URL and return the parsed json"
"""Hit a given URL and return the parsed json.
"""
self.log.debug('Fetching Couchdb stats at url: %s' % url)
req = urllib2.Request(url, None, headers(self.agent_config))

View File

@ -3,9 +3,9 @@ import urllib2
import re
import sys
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
# Constants
@ -16,6 +16,7 @@ DEFAULT_TIMEOUT = 10
class Couchbase(AgentCheck):
"""Extracts stats from Couchbase via its REST API
http://docs.couchbase.com/couchbase-manual-2.0/#using-the-rest-api
"""
@ -49,7 +50,9 @@ class Couchbase(AgentCheck):
metric_name, val, dimensions=metric_dimensions, device_name=node_name)
def _get_stats(self, url, instance):
"Hit a given URL and return the parsed json"
"""Hit a given URL and return the parsed json.
"""
self.log.debug('Fetching Couchbase stats at url: %s' % url)
req = urllib2.Request(url, None, headers(self.agent_config))
if 'user' in instance and 'password' in instance:

View File

@ -1,6 +1,9 @@
from fnmatch import fnmatch
from os import stat, walk
from os.path import abspath, exists, join
from os import stat
from os import walk
from os.path import abspath
from os.path import exists
from os.path import join
import time
from monagent.collector.checks import AgentCheck

View File

@ -1,10 +1,10 @@
import json
import urllib2
import urllib
import httplib
import socket
import json
import os
import re
import socket
import urllib2
import urllib
from urlparse import urlsplit
from monagent.collector.checks import AgentCheck
@ -70,7 +70,9 @@ DOCKER_TAGS = [
class UnixHTTPConnection(httplib.HTTPConnection, object):
"""Class used in conjuction with UnixSocketHandler to make urllib2
compatible with Unix sockets."""
compatible with Unix sockets.
"""
def __init__(self, unix_socket):
self._unix_socket = unix_socket
@ -87,8 +89,9 @@ class UnixHTTPConnection(httplib.HTTPConnection, object):
class UnixSocketHandler(urllib2.AbstractHTTPHandler):
"""Class that makes Unix sockets work with urllib2 without any additional
dependencies."""
"""Class that makes Unix sockets work with urllib2 without any additional dependencies.
"""
def unix_open(self, req):
full_path = "%s%s" % urlsplit(req.get_full_url())[1:3]
@ -180,15 +183,21 @@ class Docker(AgentCheck):
return False
def _get_containers(self, instance):
"""Gets the list of running containers in Docker."""
"""Gets the list of running containers in Docker.
"""
return self._get_json("%(url)s/containers/json" % instance, params={"size": 1})
def _get_container(self, instance, cid):
"""Get container information from Docker, gived a container Id."""
"""Get container information from Docker, gived a container Id.
"""
return self._get_json("%s/containers/%s/json" % (instance["url"], cid))
def _get_json(self, uri, params=None):
"""Utility method to get and parse JSON streams."""
"""Utility method to get and parse JSON streams.
"""
if params:
uri = "%s?%s" % (uri, urllib.urlencode(params))
self.log.debug("Connecting to: %s" % uri)
@ -205,8 +214,10 @@ class Docker(AgentCheck):
@staticmethod
def _find_cgroup(hierarchy):
"""Finds the mount point for a specified cgroup hierarchy. Works with
old style and new style mounts."""
"""Finds the mount point for a specified cgroup hierarchy.
Works with old style and new style mounts.
"""
try:
fp = open("/proc/mounts")
mounts = map(lambda x: x.split(), fp.read().splitlines())
@ -221,7 +232,9 @@ class Docker(AgentCheck):
return mountpoint
def _parse_cgroup_file(self, file_):
"""Parses a cgroup pseudo file for key/values."""
"""Parses a cgroup pseudo file for key/values.
"""
fp = None
try:
self.log.debug("Opening file: %s" % file_)

View File

@ -3,12 +3,12 @@ import socket
import subprocess
import sys
import time
import urlparse
import urllib2
import urlparse
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class NodeNotFound(Exception):
@ -154,8 +154,8 @@ class ElasticSearch(AgentCheck):
self._process_health_data(config_url, health_data, dimensions=dimensions)
def _get_es_version(self, config_url, auth=None):
"""
Get the running version of Elastic Search
"""Get the running version of Elastic Search.
"""
try:
@ -170,8 +170,8 @@ class ElasticSearch(AgentCheck):
return version
def _define_params(self, version):
"""
Define the set of URLs and METRICS to use depending on the running ES version
"""Define the set of URLs and METRICS to use depending on the running ES version.
"""
if version >= [0, 90, 10]:
@ -214,8 +214,9 @@ class ElasticSearch(AgentCheck):
self.METRICS.update(additional_metrics)
def _get_data(self, url, auth=None):
""" Hit a given URL and return the parsed json
`auth` is a tuple of (username, password) or None
"""Hit a given URL and return the parsed json
`auth` is a tuple of (username, password) or None
"""
req = urllib2.Request(url, None, headers(self.agent_config))
if auth:
@ -264,8 +265,9 @@ class ElasticSearch(AgentCheck):
process_metric(metric, *desc)
def _get_primary_addr(self, url, node_name, auth):
""" Returns a list of primary interface addresses as seen by ES.
Used in ES < 0.19
"""Returns a list of primary interface addresses as seen by ES.
Used in ES < 0.19
"""
req = urllib2.Request(url, None, headers(self.agent_config))
# Load basic authentication configuration, if available.
@ -286,9 +288,10 @@ class ElasticSearch(AgentCheck):
@staticmethod
def _host_matches_node(primary_addrs):
""" For < 0.19, check if the current host matches the IP given in the
cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
`ifconfig` on Mac
"""For < 0.19, check if the current host matches the IP given in the
cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
`ifconfig` on Mac
"""
if sys.platform == 'darwin':
ifaces = subprocess.Popen(['ifconfig'], stdout=subprocess.PIPE)
@ -312,6 +315,7 @@ class ElasticSearch(AgentCheck):
def _process_metric(self, data, metric, path, xform=None, dimensions=None):
"""data: dictionary containing all the stats
metric: datadog metric
path: corresponding path in data, flattened, e.g. thread_pool.bulk.queue
xfom: a lambda to apply to the numerical value

View File

@ -1,5 +1,4 @@
"""
Collects metrics from the gunicorn web server.
"""Collects metrics from the gunicorn web server.
http://gunicorn.org/
"""
@ -42,7 +41,9 @@ class GUnicornCheck(AgentCheck):
return {"psutil": version}
def check(self, instance):
""" Collect metrics for the given gunicorn instance. """
"""Collect metrics for the given gunicorn instance.
"""
if not psutil:
raise GUnicornCheckError("gunicorn check requires the psutil python package")
@ -107,7 +108,9 @@ class GUnicornCheck(AgentCheck):
@staticmethod
def _get_master_proc_by_name(name):
""" Return a psutil process for the master gunicorn process with the given name. """
"""Return a psutil process for the master gunicorn process with the given name.
"""
master_name = GUnicornCheck._get_master_proc_name(name)
master_procs = [
p for p in psutil.process_iter() if p.cmdline and p.cmdline[0] == master_name]
@ -121,7 +124,9 @@ class GUnicornCheck(AgentCheck):
@staticmethod
def _get_master_proc_name(name):
""" Return the name of the master gunicorn process for the given proc name. """
"""Return the name of the master gunicorn process for the given proc name.
"""
# Here's an example of a process list for a gunicorn box with name web1
# root 22976 0.1 0.1 60364 13424 ? Ss 19:30 0:00 gunicorn: master [web1]
# web 22984 20.7 2.3 521924 176136 ? Sl 19:30 1:58 gunicorn: worker [web1]

View File

@ -1,6 +1,6 @@
import urllib2
import time
from collections import defaultdict
import time
import urllib2
from monagent.collector.checks import AgentCheck
from monagent.common.util import headers
@ -65,7 +65,9 @@ class HAProxy(AgentCheck):
url=url, collect_status_metrics=collect_status_metrics)
def _fetch_data(self, url, username, password):
''' Hit a given URL and return the parsed json '''
"""Hit a given URL and return the parsed json.
"""
# Try to fetch data from the stats URL
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
@ -85,8 +87,10 @@ class HAProxy(AgentCheck):
def _process_data(self, data, collect_aggregates_only, process_events,
url=None, collect_status_metrics=False):
''' Main data-processing loop. For each piece of useful data, we'll
either save a metric, save an event or both. '''
"""Main data-processing loop. For each piece of useful data, we'll
either save a metric, save an event or both.
"""
# Split the first line into an index of fields
# The line looks like:
@ -164,8 +168,8 @@ class HAProxy(AgentCheck):
def _process_metrics(self, data_list, service, url):
for data in data_list:
"""
Each element of data_list is a dictionary related to one host
"""Each element of data_list is a dictionary related to one host
(one line) extracted from the csv. All of these elements should
have the same value for 'pxname' key
It should look like:
@ -193,8 +197,9 @@ class HAProxy(AgentCheck):
self.gauge(name, value, dimensions=dimensions)
def _process_events(self, data_list, url):
''' Main event processing loop. Events will be created for a service
status change '''
"""Main event processing loop. Events will be created for a service status change.
"""
for data in data_list:
hostname = data['svname']
service_name = data['pxname']

View File

@ -4,6 +4,7 @@ from monagent.collector.checks import AgentCheck
class HDFSCheck(AgentCheck):
"""Report on free space and space used in HDFS.
"""
def check(self, instance):

View File

@ -1,22 +1,29 @@
#!/bin/env python
"""Monitoring Agent remote host aliveness checker"""
"""Monitoring Agent remote host aliveness checker.
"""
import socket
import subprocess
import sys
from monagent.collector.checks.services_checks import ServicesCheck, Status
from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class HostAlive(ServicesCheck):
"""Inherit ServicesCheck class to test if a host is alive or not"""
"""Inherit ServicesCheck class to test if a host is alive or not.
"""
def __init__(self, name, init_config, agent_config, instances=None):
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
def _test_ssh(self, host, port, timeout=None):
""" Connect to the SSH port (typically 22) and look for a banner """
"""Connect to the SSH port (typically 22) and look for a banner.
"""
if port is None:
port = 22
try:
@ -46,7 +53,9 @@ class HostAlive(ServicesCheck):
@staticmethod
def _test_ping(host, timeout=None):
""" Attempt to ping the host """
"""Attempt to ping the host.
"""
ping_prefix = "ping -c 1 -q "
if timeout is not None:
ping_prefix += "-W " + str(timeout) + " "
@ -70,11 +79,15 @@ class HostAlive(ServicesCheck):
return True
def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API"""
"""Does nothing: status events are not yet supported by Mon API.
"""
return
def _check(self, instance):
"""Run the desired host-alive check againt this host"""
"""Run the desired host-alive check againt this host.
"""
dimensions = {'target_host': instance['host_name'], 'observer_host': socket.getfqdn()}
# Add per-instance dimensions, if any

View File

@ -1,15 +1,20 @@
#!/bin/env python
"""Monitoring Agent plugin for HTTP/API checks"""
"""Monitoring Agent plugin for HTTP/API checks.
"""
import socket
import time
import json
import re
import socket
import time
from httplib2 import Http, HttpLib2Error, httplib
from httplib2 import Http
from httplib2 import httplib
from httplib2 import HttpLib2Error
from monagent.collector.checks.services_checks import ServicesCheck, Status
from monagent.collector.checks.check import AgentCheck
from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class HTTPCheck(ServicesCheck):
@ -38,7 +43,9 @@ class HTTPCheck(ServicesCheck):
return url, username, password, timeout, include_content, headers, response_time, dimensions, ssl, pattern, use_keystone, token
def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API"""
"""Does nothing: status events are not yet supported by Mon API.
"""
return
def _check(self, instance):

View File

@ -1,10 +1,8 @@
from collections import defaultdict
from glob import glob
import os
import time
from collections import defaultdict
from glob import glob
try:
from xml.etree.ElementTree import ElementTree
except ImportError:
@ -13,14 +11,14 @@ except ImportError:
except ImportError:
pass
from monagent.common.util import get_hostname
from monagent.collector.checks import AgentCheck
from monagent.common.util import get_hostname
class Skip(Exception):
"""
Raised by :class:`Jenkins` when it comes across
"""Raised by :class:`Jenkins` when it comes across
a build or job that should be excluded from being checked.
"""

View File

@ -1,13 +1,12 @@
from collections import defaultdict
import sys
import random
if sys.version_info < (2, 6):
# Normally we'd write our checks to be compatible with >= python 2.4 but
# the dependencies of this check are not compatible with 2.4 and would
# be too much work to rewrite, so raise an exception here.
raise Exception('kafka_consumer check requires at least Python 2.6')
from collections import defaultdict
from monagent.collector.checks import AgentCheck
try:
from kafka.client import KafkaClient
from kafka.common import OffsetRequest
@ -18,7 +17,8 @@ try:
from kazoo.exceptions import NoNodeError
except ImportError:
raise Exception('Missing python dependency: kazoo (https://github.com/python-zk/kazoo)')
import random
from monagent.collector.checks import AgentCheck
class KafkaCheck(AgentCheck):

View File

@ -1,6 +1,6 @@
from collections import defaultdict
import re
import urllib2
from collections import defaultdict
from monagent.collector.checks import AgentCheck
@ -12,6 +12,7 @@ whitespace = re.compile(r'\s')
class KyotoTycoonCheck(AgentCheck):
"""Report statistics about the Kyoto Tycoon DBM-style
database server (http://fallabs.com/kyototycoon/)
"""

View File

@ -1,8 +1,8 @@
import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Lighttpd(AgentCheck):

View File

@ -1,4 +1,4 @@
from monagent.collector.checks import *
from monagent.collector.checks import AgentCheck
# Reference: http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
# Name Type Meaning

View File

@ -1,6 +1,6 @@
import re
import types
import time
import types
from monagent.collector.checks import AgentCheck
from monagent.common.util import get_hostname
@ -109,7 +109,9 @@ class MongoDb(AgentCheck):
def create_event(self, state, server, agentConfig):
"""Create an event with a message describing the replication
state of a mongo node"""
state of a mongo node
"""
def get_state_description(state):
if state == 0:
@ -148,8 +150,8 @@ class MongoDb(AgentCheck):
})
def check(self, instance):
"""
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
"""Returns a dictionary that looks a lot like what's sent back by db.serverStatus().
"""
if 'server' not in instance:
self.log.warn("Missing 'server' in mongo config")

View File

@ -1,7 +1,7 @@
import subprocess
import os
import sys
import re
import subprocess
import sys
import traceback
from monagent.collector.checks import AgentCheck
@ -235,8 +235,8 @@ class MySql(AgentCheck):
return the_type(dict[key])
def _collect_dict(self, metric_type, field_metric_map, query, db, dimensions):
"""
Query status and get a dictionary back.
"""Query status and get a dictionary back.
Extract each field out of the dictionary
and stuff it in the corresponding metric.

View File

@ -1,5 +1,7 @@
#!/bin/env python
"""Monitoring Agent wrapper for Nagios checks"""
"""Monitoring Agent wrapper for Nagios checks.
"""
import hashlib
import json
@ -14,16 +16,20 @@ from monagent.collector.checks.services_checks import ServicesCheck, Status
class WrapNagios(ServicesCheck):
"""Inherit ServicesCheck class to process Nagios checks"""
"""Inherit ServicesCheck class to process Nagios checks.
"""
def __init__(self, name, init_config, agent_config, instances=None):
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
@staticmethod
def _do_skip_check(instance, last_run_data):
""" Determine whether or not to skip a check depending on
the checks's check_interval, if specified, and the last
time the check was run """
"""Determine whether or not to skip a check depending on
the checks's check_interval, if specified, and the last
time the check was run
"""
if instance['service_name'] in last_run_data and 'check_interval' in instance:
if time.time() < last_run_data[instance['service_name']] + instance['check_interval']:
return True
@ -31,11 +37,15 @@ class WrapNagios(ServicesCheck):
return False
def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API"""
"""Does nothing: status events are not yet supported by Mon API.
"""
return
def _check(self, instance):
"""Run the command specified by check_command and capture the result"""
"""Run the command specified by check_command and capture the result.
"""
dimensions = {'observer_host': socket.getfqdn()}
# Add per-instance dimensions, if any

View File

@ -1,10 +1,10 @@
"""
Collects network metrics.
"""Collects network metrics.
"""
# stdlib
import subprocess
import re
import subprocess
# project
from monagent.collector.checks import AgentCheck
@ -254,8 +254,8 @@ class Network(AgentCheck):
self._submit_devicemetrics(interface, metrics)
def _parse_solaris_netstat(self, netstat_output):
"""
Return a mapping of network metrics by interface. For example:
"""Return a mapping of network metrics by interface. For example:
{ interface:
{'bytes_out': 0,
'bytes_in': 0,

View File

@ -1,14 +1,15 @@
import re
import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Nginx(AgentCheck):
"""Tracks basic nginx metrics via the status module
* number of connections
* number of requets per second
@ -20,7 +21,6 @@ class Nginx(AgentCheck):
server accepts handled requests
1156958 1156958 4491319
Reading: 0 Writing: 2 Waiting: 6
"""
def check(self, instance):

View File

@ -8,7 +8,8 @@ class ShouldRestartException(Exception):
class PostgreSql(AgentCheck):
"""Collects per-database, and optionally per-relation metrics
"""Collects per-database, and optionally per-relation metrics.
"""
RATE = AgentCheck.rate
@ -120,6 +121,7 @@ SELECT relname,
def _collect_stats(self, key, db, dimensions, relations):
"""Query pg_stat_* for various metrics
If relations is not an empty list, gather per-relation metrics
on top of that.
"""
@ -190,7 +192,9 @@ SELECT relname,
[v[0][1](self, v[0][0], v[1], dimensions=dimensions) for v in values]
def get_connection(self, key, host, port, user, password, dbname, use_cached=True):
"Get and memoize connections to instances"
"""Get and memorize connections to instances.
"""
if key in self.dbs and use_cached:
return self.dbs[key]

View File

@ -1,4 +1,6 @@
"""Gather metrics on specific processes"""
"""Gather metrics on specific processes.
"""
from monagent.collector.checks import AgentCheck
from monagent.common.util import Platform
@ -28,8 +30,8 @@ class ProcessCheck(AgentCheck):
return False
def find_pids(self, search_string, psutil, exact_match=True):
"""
Create a set of pids of selected processes.
"""Create a set of pids of selected processes.
Search for search_string
"""
found_process_list = []

View File

@ -1,7 +1,7 @@
import json
import time
import urllib2
import urlparse
import time
from monagent.collector.checks import AgentCheck
@ -42,6 +42,7 @@ METRIC_SUFFIX = {QUEUE_TYPE: "queue", NODE_TYPE: "node"}
class RabbitMQ(AgentCheck):
"""This check is for gathering statistics from the RabbitMQ
Management Plugin (http://www.rabbitmq.com/management.html)
"""
@ -104,8 +105,8 @@ class RabbitMQ(AgentCheck):
return data
def get_stats(self, instance, base_url, object_type, max_detailed, specified_list):
"""
instance: the check instance
"""instance: the check instance
base_url: the url of the rabbitmq management api (e.g. http://localhost:15672/api)
object_type: either QUEUE_TYPE or NODE_TYPE
max_detailed: the limit of objects to collect for this type
@ -116,7 +117,8 @@ class RabbitMQ(AgentCheck):
# Make a copy of this list as we will remove items from it at each iteration
specified_items = list(specified_list)
""" data is a list of nodes or queues:
"""data is a list of nodes or queues:
data = [
{'status': 'running', 'node': 'rabbit@host', 'name': 'queue1', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
{'status': 'running', 'node': 'rabbit@host, 'name': 'queue10', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},

View File

@ -1,6 +1,6 @@
'''
Redis checks
'''
"""Redis checks.
"""
import re
import time
@ -84,7 +84,9 @@ class Redis(AgentCheck):
return {"redis": version}
def _parse_dict_string(self, string, key, default):
"""Take from a more recent redis.py, parse_info"""
"""Take from a more recent redis.py, parse_info.
"""
try:
for item in string.split(','):
k, v = item.rsplit('=', 1)
@ -94,7 +96,7 @@ class Redis(AgentCheck):
except ValueError:
return v
return default
except Exception as e:
except Exception:
self.log.exception("Cannot parse dictionary string: %s" % string)
return default
@ -144,7 +146,7 @@ class Redis(AgentCheck):
start = time.time()
try:
info = conn.info()
except ValueError as e:
except ValueError:
# This is likely a know issue with redis library 2.0.0
# See https://github.com/DataDog/dd-agent/issues/374 for details
import redis

View File

@ -1,9 +1,9 @@
from hashlib import md5
from httplib2 import Http
from httplib2 import HttpLib2Error
import json
import time
import socket
from httplib2 import Http, HttpLib2Error
import time
from monagent.collector.checks import AgentCheck
@ -60,15 +60,15 @@ class Riak(AgentCheck):
h = Http(timeout=timeout)
resp, content = h.request(url, "GET")
except socket.timeout as e:
except socket.timeout:
self.timeout_event(url, timeout, aggregation_key)
return
except socket.error as e:
except socket.error:
self.timeout_event(url, timeout, aggregation_key)
return
except HttpLib2Error as e:
except HttpLib2Error:
self.timeout_event(url, timeout, aggregation_key)
return

View File

@ -1,6 +1,6 @@
'''
Check the performance counters from SQL Server
'''
"""Check the performance counters from SQL Server.
"""
import traceback
from monagent.collector.checks import AgentCheck
@ -39,14 +39,16 @@ class SQLServer(AgentCheck):
@staticmethod
def _conn_key(host, username, password, database):
''' Return a key to use for the connection cache
'''
"""Return a key to use for the connection cache.
"""
return '%s:%s:%s:%s' % (host, username, password, database)
@staticmethod
def _conn_string(host, username, password, database):
''' Return a connection string to use with adodbapi
'''
"""Return a connection string to use with adodbapi.
"""
conn_str = 'Provider=SQLOLEDB;Data Source=%s;Initial Catalog=%s;' % (host, database)
if username:
conn_str += 'User ID=%s;' % (username)
@ -74,7 +76,7 @@ class SQLServer(AgentCheck):
conn_str = self._conn_string(host, username, password, database)
conn = adodbapi.connect(conn_str)
self.connections[conn_key] = conn
except Exception as e:
except Exception:
cx = "%s - %s" % (host, database)
raise Exception("Unable to connect to SQL Server for instance %s.\n %s"
% (cx, traceback.format_exc()))
@ -84,8 +86,9 @@ class SQLServer(AgentCheck):
self._fetch_metrics(cursor, dimensions)
def _fetch_metrics(self, cursor, custom_dimensions):
''' Fetch the metrics from the sys.dm_os_performance_counters table
'''
"""Fetch the metrics from the sys.dm_os_performance_counters table.
"""
for metric in self.METRICS:
# Normalize all rows to the same size for easy of use
if len(metric) == 3:
@ -100,7 +103,7 @@ class SQLServer(AgentCheck):
if instance_n == ALL_INSTANCES:
try:
self._fetch_all_instances(metric, cursor, custom_dimensions)
except Exception as e:
except Exception:
self.log.exception('Unable to fetch metric: %s' % mname)
self.warning('Unable to fetch metric: %s' % mname)
else:
@ -112,7 +115,7 @@ class SQLServer(AgentCheck):
and instance_name = ?
""", (counter, instance_n))
(value,) = cursor.fetchone()
except Exception as e:
except Exception:
self.log.exception('Unable to fetch metric: %s' % mname)
self.warning('Unable to fetch metric: %s' % mname)
continue

View File

@ -1,7 +1,9 @@
import socket
import time
from monagent.collector.checks.services_checks import ServicesCheck, Status, EventType
from monagent.collector.checks.services_checks import EventType
from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class BadConfException(Exception):

View File

@ -1,6 +1,6 @@
import xml.parsers.expat # python 2.4 compatible
import re
import subprocess
import xml.parsers.expat # python 2.4 compatible
from monagent.collector.checks import AgentCheck

View File

@ -1,8 +1,8 @@
'''
Monitor the Windows Event Log
'''
from datetime import datetime
"""Monitor the Windows Event Log.
"""
import calendar
from datetime import datetime
try:
import wmi
except Exception:
@ -75,9 +75,10 @@ class Win32EventLog(AgentCheck):
@staticmethod
def _instance_key(instance):
''' Generate a unique key per instance for use with keeping track of
state for each instance.
'''
"""Generate a unique key per instance for use with keeping track of
state for each instance.
"""
return '%s' % (instance)
@ -95,7 +96,9 @@ class EventLogQuery(object):
self.start_ts = start_ts
def to_wql(self):
''' Return this query as a WQL string. '''
"""Return this query as a WQL string.
"""
wql = """
SELECT Message, SourceName, TimeGenerated, Type, User, InsertionStrings
FROM Win32_NTLogEvent
@ -125,9 +128,10 @@ class EventLogQuery(object):
@staticmethod
def _add_message_filter(msg_filter, q):
''' Filter on the message text using a LIKE query. If the filter starts
with '-' then we'll assume that it's a NOT LIKE filter.
'''
"""Filter on the message text using a LIKE query. If the filter starts
with '-' then we'll assume that it's a NOT LIKE filter.
"""
if msg_filter.startswith('-'):
msg_filter = msg_filter[1:]
q += '\nAND NOT Message LIKE "%s"' % msg_filter
@ -137,18 +141,19 @@ class EventLogQuery(object):
@staticmethod
def _dt_to_wmi(dt):
''' A wrapper around wmi.from_time to get a WMI-formatted time from a
time struct.
'''
"""A wrapper around wmi.from_time to get a WMI-formatted time from a time struct.
"""
return wmi.from_time(year=dt.year, month=dt.month, day=dt.day,
hours=dt.hour, minutes=dt.minute, seconds=dt.second, microseconds=0,
timezone=0)
@staticmethod
def _convert_event_types(types):
''' Detect if we are running on <= Server 2003. If so, we should convert
"""Detect if we are running on <= Server 2003. If so, we should convert
the EventType values to integers
'''
"""
return types
@ -177,15 +182,18 @@ class LogEvent(object):
}
def is_after(self, ts):
''' Compare this event's timestamp to a give timestamp. '''
"""Compare this event's timestamp to a give timestamp.
"""
if self.timestamp >= int(calendar.timegm(ts.timetuple())):
return True
return False
@staticmethod
def _wmi_to_ts(wmi_ts):
''' Convert a wmi formatted timestamp into an epoch using wmi.to_time().
'''
"""Convert a wmi formatted timestamp into an epoch using wmi.to_time().
"""
year, month, day, hour, minute, second, microsecond, tz = \
wmi.to_time(wmi_ts)
dt = datetime(year=year, month=month, day=day, hour=hour, minute=minute,

View File

@ -1,10 +1,9 @@
'''
Windows Only.
"""Windows Only.
Generic WMI check. This check allows you to specify particular metrics that you
want from WMI in your configuration. Check wmi.yaml.example in your conf.d
directory for more details on configuration.
'''
"""
try:
import wmi
except Exception:

View File

@ -1,5 +1,4 @@
'''
Parses the response from zookeeper's `stat` admin command, which looks like:
"""Parses the response from zookeeper's `stat` admin command, which looks like:
```
Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
@ -21,13 +20,12 @@ Node count: 487
```
Tested with Zookeeper versions 3.0.0 to 3.4.5
'''
"""
import re
import socket
import struct
from StringIO import StringIO
import struct
from monagent.collector.checks import AgentCheck
@ -87,9 +85,10 @@ class Zookeeper(AgentCheck):
@classmethod
def parse_stat(cls, buf):
''' `buf` is a readable file-like object
returns a tuple: ([(metric_name, value)], dimensions)
'''
"""`buf` is a readable file-like object
returns a tuple: ([(metric_name, value)], dimensions)
"""
metrics = []
buf.seek(0)

View File

@ -1,19 +1,25 @@
#!/usr/bin/env python
# set up logging before importing any other components
from monagent.common.config import get_version, initialize_logging
initialize_logging('collector')
import os
os.umask(0o22)
# Core modules
import glob
import logging
import os.path
import os
import signal
import sys
import time
import glob
# Custom modules
import checks.collector
import jmxfetch
import monagent.common.check_status
import monagent.common.config
import monagent.common.daemon
import monagent.common.emitter
import monagent.common.util
# set up logging before importing any other components
monagent.common.config.initialize_logging('collector')
os.umask(0o22)
# Check we're not using an old version of Python. We need 2.4 above because
# some modules (like subprocess) were only introduced in 2.4.
@ -21,16 +27,6 @@ if int(sys.version_info[1]) <= 3:
sys.stderr.write("Monasca Agent requires python 2.4 or later.\n")
sys.exit(2)
# Custom modules
from checks.collector import Collector
from monagent.common.check_status import CollectorStatus, ForwarderStatus
from monagent.common.config import get_config, get_parsed_args, load_check_directory, get_confd_path, check_yaml, get_logging_config
from monagent.common.daemon import Daemon, AgentSupervisor
from monagent.common.emitter import http_emitter
from monagent.common.util import Watchdog, PidFile, get_os
from jmxfetch import JMXFetch, JMX_LIST_COMMANDS
# Constants
PID_NAME = "monasca-agent"
WATCHDOG_MULTIPLIER = 10
@ -43,14 +39,14 @@ log = logging.getLogger('collector')
# todo the collector has daemon code but is always run in foreground mode
# from the supervisor, is there a reason for the daemon code then?
class CollectorDaemon(Daemon):
class CollectorDaemon(monagent.common.daemon.Daemon):
"""The agent class is a daemon that runs the collector in a background process.
"""
The agent class is a daemon that runs the collector in a background process.
"""
def __init__(self, pidfile, autorestart, start_event=True):
Daemon.__init__(self, pidfile, autorestart=autorestart)
monagent.common.daemon.Daemon.__init__(self, pidfile, autorestart=autorestart)
self.run_forever = True
self.collector = None
self.start_event = start_event
@ -59,8 +55,8 @@ class CollectorDaemon(Daemon):
log.debug("Caught sigterm. Stopping run loop.")
self.run_forever = False
if JMXFetch.is_running():
JMXFetch.stop()
if jmxfetch.JMXFetch.is_running():
jmxfetch.JMXFetch.stop()
if self.collector:
self.collector.stop()
@ -72,10 +68,12 @@ class CollectorDaemon(Daemon):
def info(self, verbose=None):
logging.getLogger().setLevel(logging.ERROR)
return CollectorStatus.print_latest_status(verbose=verbose)
return monagent.common.check_status.CollectorStatus.print_latest_status(verbose=verbose)
def run(self, config=None):
"""Main loop of the collector"""
"""Main loop of the collector.
"""
# Gracefully exit on sigterm.
signal.signal(signal.SIGTERM, self._handle_sigterm)
@ -87,15 +85,15 @@ class CollectorDaemon(Daemon):
signal.signal(signal.SIGINT, self._handle_sigterm)
# Save the agent start-up stats.
CollectorStatus().persist()
monagent.common.check_status.CollectorStatus().persist()
# Intialize the collector.
if config is None:
config = get_config(parse_args=True)
config = monagent.common.config.get_config(parse_args=True)
# Load the checks_d checks
checksd = load_check_directory(config)
self.collector = Collector(config, http_emitter, checksd)
checksd = monagent.common.config.load_check_directory(config)
self.collector = checks.collector.Collector(config, monagent.common.emitter.http_emitter, checksd)
# Configure the watchdog.
check_frequency = int(config['check_freq'])
@ -127,9 +125,9 @@ class CollectorDaemon(Daemon):
if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled:
try:
profiler.disable()
import cStringIO
import pstats
from cStringIO import StringIO
s = StringIO()
s = cStringIO.StringIO()
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
ps.print_stats()
log.debug(s.getvalue())
@ -149,7 +147,7 @@ class CollectorDaemon(Daemon):
# Now clean-up.
try:
CollectorStatus.remove_latest_status()
monagent.common.check_status.CollectorStatus.remove_latest_status()
except Exception:
pass
@ -162,8 +160,9 @@ class CollectorDaemon(Daemon):
def _get_watchdog(check_freq, agentConfig):
watchdog = None
if agentConfig.get("watchdog", True):
watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER,
max_mem_mb=agentConfig.get('limit_memory_consumption', None))
watchdog = monagent.common.util.Watchdog(check_freq * WATCHDOG_MULTIPLIER,
max_mem_mb=agentConfig.get('limit_memory_consumption',
None))
watchdog.reset()
return watchdog
@ -176,12 +175,12 @@ class CollectorDaemon(Daemon):
log.info("Running an auto-restart.")
if self.collector:
self.collector.stop()
sys.exit(AgentSupervisor.RESTART_EXIT_STATUS)
sys.exit(monagent.common.daemon.AgentSupervisor.RESTART_EXIT_STATUS)
def main():
options, args = get_parsed_args()
agentConfig = get_config(options=options)
options, args = monagent.common.config.get_parsed_args()
agentConfig = monagent.common.config.get_config(options=options)
# todo autorestart isn't used remove
autorestart = agentConfig.get('autorestart', False)
@ -207,7 +206,7 @@ def main():
sys.stderr.write("Unknown command: %s\n" % command)
return 3
pid_file = PidFile('monasca-agent')
pid_file = monagent.common.util.PidFile('monasca-agent')
if options.clean:
pid_file.clean()
@ -215,7 +214,7 @@ def main():
agent = CollectorDaemon(pid_file.get_path(), autorestart)
if command in START_COMMANDS:
log.info('Agent version %s' % get_version())
log.info('Agent version %s' % monagent.common.config.get_version())
if 'start' == command:
log.info('Start daemon')
@ -247,34 +246,29 @@ def main():
def parent_func():
agent.start_event = False
AgentSupervisor.start(parent_func, child_func)
monagent.common.daemon.AgentSupervisor.start(parent_func, child_func)
else:
# Run in the standard foreground.
agent.run(config=agentConfig)
elif 'check' == command:
check_name = args[1]
try:
# Try the old-style check first
print(getattr(collector.checks.collector, check_name)(log).check(agentConfig))
except Exception:
# If not an old-style check, try checks_d
checks = load_check_directory(agentConfig)
for check in checks['initialized_checks']:
if check.name == check_name:
checks = monagent.common.config.load_check_directory(agentConfig)
for check in checks['initialized_checks']:
if check.name == check_name:
check.run()
print("Metrics: ")
check.get_metrics(prettyprint=True)
if len(args) == 3 and args[2] == 'check_rate':
print("Running 2nd iteration to capture rate metrics")
time.sleep(1)
check.run()
print("Metrics: ")
check.get_metrics(prettyprint=True)
if len(args) == 3 and args[2] == 'check_rate':
print("Running 2nd iteration to capture rate metrics")
time.sleep(1)
check.run()
print("Metrics: ")
check.get_metrics(prettyprint=True)
elif 'check_all' == command:
print("Loading check directory...")
checks = load_check_directory(agentConfig)
checks = monagent.common.config.load_check_directory(agentConfig)
print("...directory loaded.\n")
for check in checks['initialized_checks']:
print("#" * 80)
@ -285,12 +279,12 @@ def main():
print("#" * 80 + "\n\n")
elif 'configcheck' == command or 'configtest' == command:
osname = get_os()
osname = monagent.common.util.get_os()
all_valid = True
for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")):
for conf_path in glob.glob(os.path.join(monagent.common.config.get_confd_path(osname), "*.yaml")):
basename = os.path.basename(conf_path)
try:
check_yaml(conf_path)
monagent.common.config.check_yaml(conf_path)
except Exception as e:
all_valid = False
print("%s contains errors:\n %s" % (basename, e))
@ -307,14 +301,14 @@ def main():
elif 'jmx' == command:
if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
print("#" * 80)
print("JMX tool to be used to help configuring your JMX checks.")
print("See http://docs.datadoghq.com/integrations/java/ for more information")
print("#" * 80)
print("\n")
print("You have to specify one of the following command:")
for command, desc in JMX_LIST_COMMANDS.iteritems():
for command, desc in jmxfetch.JMX_LIST_COMMANDS.iteritems():
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
print("\n")
@ -322,11 +316,11 @@ def main():
else:
jmx_command = args[1]
checks_list = args[2:]
confd_directory = get_confd_path(get_os())
should_run = JMXFetch.init(
confd_directory = monagent.common.config.get_confd_path(monagent.common.util.get_os())
should_run = jmxfetch.JMXFetch.init(
confd_directory,
agentConfig,
get_logging_config(),
monagent.common.config.get_logging_config(),
15,
jmx_command,
checks_list,

View File

@ -1,7 +1,7 @@
from datetime import datetime
import re
from collector.dogstream import common
from monagent.collector.dogstream import common
LOG4J_PRIORITY = [

View File

@ -1,14 +1,11 @@
"""
Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
"""Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
Add to datadog.conf as follows:
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
"""
from datetime import datetime
import time
import re
import time
EVENT_TYPE = "supervisor"
@ -37,8 +34,8 @@ program_matcher = re.compile("^\w+:? '?(?P<program>\w+)'?")
def parse_supervisord(log, line):
"""
Parse the supervisord.log line into a dogstream event
"""Parse the supervisord.log line into a dogstream event.
"""
if len(line) == 0:
log.info("Skipping empty line of supervisord.log")
@ -71,9 +68,9 @@ def parse_supervisord(log, line):
return None
if __name__ == "__main__":
import sys
import pprint
import logging
import pprint
import sys
logging.basicConfig()
log = logging.getLogger()
lines = open(sys.argv[1]).readlines()

View File

@ -1,19 +1,14 @@
# std
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import os
import logging
import glob
import logging
import os
import signal
import subprocess
import tempfile
import time
from monagent.common.util import PidFile, get_os
import yaml
import monagent.common.util
log = logging.getLogger(__name__)
@ -55,7 +50,7 @@ class InvalidJMXConfiguration(Exception):
class JMXFetch(object):
pid_file = PidFile("jmxfetch")
pid_file = monagent.common.util.PidFile("jmxfetch")
pid_file_path = pid_file.get_path()
@classmethod
@ -95,27 +90,26 @@ class JMXFetch(object):
@classmethod
def should_run(cls, confd_path, checks_list):
"""
Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options)
'''Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options).
jmx_checks: list of yaml files that are jmx checks
(they have the is_jmx flag enabled or they are in JMX_CHECKS)
and that have at least one instance configured
jmx_checks: list of yaml files that are jmx checks
(they have the is_jmx flag enabled or they are in JMX_CHECKS)
and that have at least one instance configured
invalid_checks: dictionary whose keys are check names that are JMX checks but
they have a bad configuration. Values of the dictionary are exceptions generated
when checking the configuration
invalid_checks: dictionary whose keys are check names that are JMX checks but
they have a bad configuration. Values of the dictionary are exceptions generated
when checking the configuration
java_bin_path: is the path to the java executable. It was
previously set in the "instance" part of the yaml file of the
jmx check. So we need to parse yaml files to get it.
We assume that this value is alwayws the same for every jmx check
so we can return the first value returned
java_bin_path: is the path to the java executable. It was
previously set in the "instance" part of the yaml file of the
jmx check. So we need to parse yaml files to get it.
We assume that this value is alwayws the same for every jmx check
so we can return the first value returned
java_options: is string contains options that will be passed to java_bin_path
We assume that this value is alwayws the same for every jmx check
so we can return the first value returned
"""
java_options: is string contains options that will be passed to java_bin_path
We assume that this value is alwayws the same for every jmx check
so we can return the first value returned
'''
jmx_checks = []
java_bin_path = None
@ -129,6 +123,10 @@ class JMXFetch(object):
if os.path.exists(conf):
f = open(conf)
try:
if hasattr(yaml, 'CLoader'):
Loader = yaml.CLoader
else:
Loader = yaml.Loader
check_config = yaml.load(f.read(), Loader=Loader)
assert check_config is not None
f.close()
@ -236,7 +234,7 @@ class JMXFetch(object):
except Exception:
return False
if get_os() != 'windows':
if monagent.common.util.get_os() != 'windows':
try:
os.kill(pid, 0)
# os.kill(pid, 0) will throw an exception if pid is not running
@ -292,7 +290,7 @@ class JMXFetch(object):
@classmethod
def get_path_to_jmxfetch(cls):
if get_os() != 'windows':
if monagent.common.util.get_os() != 'windows':
return os.path.realpath(
os.path.join(os.path.abspath(__file__), "..", "../collector/checks", "libs",
JMX_FETCH_JAR_NAME))

View File

@ -1,16 +1,18 @@
""" Tools for loading Python modules from arbitrary locations.
"""Tools for loading Python modules from arbitrary locations.
"""
import os
import imp
import os
import sys
# todo seems to be only used by dogstream at this point, possibly remove?
def imp_type_for_filename(filename):
"""Given the name of a Python module, return a type description suitable to
be passed to imp.load_module()"""
"""Given the name of a Python module, return a type description suitable to be passed to imp.load_module().
"""
for type_data in imp.get_suffixes():
extension = type_data[0]
if filename.endswith(extension):
@ -19,7 +21,9 @@ def imp_type_for_filename(filename):
def load_qualified_module(full_module_name, path=None):
"""Load a module which may be within a package"""
"""Load a module which may be within a package.
"""
remaining_pieces = full_module_name.split('.')
done_pieces = []
file_obj = None
@ -40,7 +44,8 @@ def module_name_for_filename(filename):
"""Given the name of a Python file, find an appropropriate module name.
This involves determining whether the file is within a package, and
determining the name of same."""
determining the name of same.
"""
all_segments = filename.split(os.sep)
path_elements = all_segments[:-1]
module_elements = [all_segments[-1].rsplit('.', 1)[0]]
@ -52,10 +57,10 @@ def module_name_for_filename(filename):
def get_module(name):
"""Given either an absolute path to a Python file or a module name, load
and return a Python module.
"""Given either an absolute path to a Python file or a module name, load and return a Python module.
If the module is already loaded, takes no action."""
If the module is already loaded, takes no action.
"""
if name.startswith('/'):
basename, modulename = module_name_for_filename(name)
path = [basename]
@ -68,8 +73,9 @@ def get_module(name):
def load(config_string, default_name=None):
"""Given a module name and an object expected to be contained within,
return said object"""
"""Given a module name and an object expected to be contained within, return said object.
"""
(module_name, object_name) = (config_string.rsplit(':', 1) + [default_name])[:2]
module = get_module(module_name)
if object_name:

View File

@ -709,6 +709,6 @@ def get_jmx_status():
return check_statuses
except Exception as e:
except Exception:
log.exception("Couldn't load latest jmx status")
return []

View File

@ -75,8 +75,6 @@ def _windows_commondata_path():
import ctypes
from ctypes import wintypes, windll
CSIDL_COMMON_APPDATA = 35
_SHGetFolderPath = windll.shell32.SHGetFolderPathW
_SHGetFolderPath.argtypes = [wintypes.HWND,
ctypes.c_int,
@ -84,7 +82,6 @@ def _windows_commondata_path():
wintypes.DWORD, wintypes.LPCWSTR]
path_buf = wintypes.create_unicode_buffer(wintypes.MAX_PATH)
result = _SHGetFolderPath(0, CSIDL_COMMON_APPDATA, 0, 0, path_buf)
return path_buf.value
@ -488,7 +485,6 @@ def get_win32service_file(osname, filename):
def check_yaml(conf_path):
f = open(conf_path)
check_name = os.path.basename(conf_path).split('.')[0]
try:
check_config = yaml.load(f.read(), Loader=Loader)
assert 'init_config' in check_config, "No 'init_config' section found"
@ -588,7 +584,6 @@ def load_check_directory(agent_config):
# Check if the config exists OR we match the old-style config
conf_path = os.path.join(confd_path, '%s.yaml' % check_name)
if os.path.exists(conf_path):
f = open(conf_path)
try:
check_config = check_yaml(conf_path)
except Exception as e:

View File

@ -35,7 +35,6 @@ class AgentSupervisor(object):
`child_func` is a function that should be run by the forked child
that will auto-restart with the RESTART_EXIT_STATUS.
'''
exit_code = cls.RESTART_EXIT_STATUS
# Allow the child process to die on SIGTERM
signal.signal(signal.SIGTERM, cls._handle_sigterm)
@ -47,7 +46,6 @@ class AgentSupervisor(object):
# The parent waits on the child.
cls.child_pid = pid
_, status = os.waitpid(pid, 0)
exit_code = status >> 8
if parent_func is not None:
parent_func()
else:

View File

@ -172,7 +172,7 @@ def get_hostname(config=None):
if hostname is None:
try:
socket_hostname = socket.gethostname()
except socket.error as e:
except socket.error:
socket_hostname = None
if socket_hostname and is_valid_hostname(socket_hostname):
hostname = socket_hostname

View File

@ -114,5 +114,5 @@ class Reporter(threading.Thread):
event_count=event_count,
).persist()
except Exception as e:
except Exception:
log.exception("Error flushing metrics")

View File

@ -38,7 +38,7 @@ class Server(object):
try:
self.forward_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.forward_udp_sock.connect((forward_to_host, forward_to_port))
except Exception as e:
except Exception:
log.exception("Error while setting up connection to external statsd server")
@staticmethod
@ -83,7 +83,7 @@ class Server(object):
elif m[0] == u'#':
event['dimensions'] = sorted(m[1:].split(u','))
return event
except IndexError as ValueError:
except IndexError:
raise Exception(u'Unparseable event packet: %s' % packet)
@staticmethod
@ -194,7 +194,7 @@ class Server(object):
raise
except (KeyboardInterrupt, SystemExit):
break
except Exception as e:
except Exception:
log.exception('Error receiving datagram')
def stop(self):

View File

@ -21,6 +21,8 @@ from ddagent import Application
from win32.common import handle_exe_click
from collector.jmxfetch import JMXFetch
from monagent.common.config import get_config, load_check_directory, set_win32_cert_path
log = logging.getLogger(__name__)
RESTART_INTERVAL = 24 * 60 * 60 # Defaults to 1 day
@ -118,8 +120,7 @@ class DDAgent(multiprocessing.Process):
def run(self):
log.debug("Windows Service - Starting collector")
emitters = self.get_emitters()
systemStats = get_system_stats()
self.collector = Collector(self.config, emitters, systemStats)
self.collector = Collector(self.config, emitters)
# Load the checks_d checks
checksd = load_check_directory(self.config)

View File

@ -1,4 +1,4 @@
# Copyright © 2009-2010 CEA
# Copyright <EFBFBD><EFBFBD> 2009-2010 CEA
# Pierre Raybaut
# Licensed under the terms of the CECILL License
# Modified for Datadog
@ -19,6 +19,8 @@ import win32service
# GUI Imports
from guidata.qt.QtCore import SIGNAL, Qt, QSize, QPoint, QTimer
from guidata.qt.QtGui import QInputDialog, QWidget, QFont, QLabel, QGroupBox, QHBoxLayout, QSystemTrayIcon
from guidata.qt.QtGui import QVBoxLayout, QPushButton, QSplitter, QListWidget, QMenu, QMessageBox
from guidata.configtools import get_icon, get_family, MONOSPACE
from guidata.qthelpers import get_std_icon

View File

@ -6,6 +6,7 @@ import collections
class Plugins(collections.defaultdict):
"""A container for the plugin configurations used by the monasca-agent.
This is essentially a defaultdict(dict) but put into a class primarily to make the interface clear, also
to add a couple of helper methods.
Each plugin config is stored with the key being its config name (excluding .yaml).
@ -21,7 +22,8 @@ class Plugins(collections.defaultdict):
raise NotImplementedError
def merge(self, other):
"""Do a deep merge with precedence going to other (as is the case with update)
"""Do a deep merge with precedence going to other (as is the case with update).
"""
# Implemented as a function so it can be used for arbitrary dictionaries not just self, this is needed
# for the recursive nature of the merge.
@ -29,7 +31,8 @@ class Plugins(collections.defaultdict):
def deep_merge(adict, other):
"""A recursive merge of two dictionaries including combining of any lists within the data structure
"""A recursive merge of two dictionaries including combining of any lists within the data structure.
"""
for key, value in other.iteritems():
if key in adict:

View File

@ -1,3 +1,6 @@
from plugin import Plugin
from utils import find_process_cmdline, find_process_name, watch_process, service_api_check
from utils import find_process_cmdline
from utils import find_process_name
from utils import watch_process
from utils import service_api_check
from service_plugin import ServicePlugin

View File

@ -1,11 +1,13 @@
"""Classes for detection of running resources to be monitored.
Detection classes should be platform independent
"""
class Plugin(object):
"""Abstract class implemented by the monasca-agent plugin detection classes
"""Abstract class implemented by the monasca-agent plugin detection classes.
"""
# todo these should include dependency detection
@ -18,22 +20,28 @@ class Plugin(object):
self._detect()
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
"""Run detection, set self.available True if the service is detected.
"""
raise NotImplementedError
def build_config(self):
"""Build the config as a Plugins object and return.
"""
raise NotImplementedError
def dependencies_installed(self):
"""return True if dependencies are installed
"""Return True if dependencies are installed.
"""
raise NotImplementedError
@property
def name(self):
"""Return _name if set otherwise the class name"""
"""Return _name if set otherwise the class name.
"""
if '_name' in self.__dict__:
return self._name
else:

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Ceilometer(ServicePlugin):
class Ceilometer(monsetup.detection.ServicePlugin):
"""Detect Ceilometer daemons and setup configuration to monitor them."""

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Cinder(ServicePlugin):
class Cinder(monsetup.detection.ServicePlugin):
"""Detect Cinder daemons and setup configuration to monitor them."""

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Glance(ServicePlugin):
class Glance(monsetup.detection.ServicePlugin):
"""Detect Glance daemons and setup configuration to monitor them."""

View File

@ -1,30 +1,34 @@
import collections
import logging
from monsetup.detection import Plugin, find_process_cmdline, watch_process
from monsetup import agent_config
import monsetup.agent_config
import monsetup.detection
log = logging.getLogger(__name__)
class Kafka(Plugin):
class Kafka(monsetup.detection.Plugin):
"""Detect Kafka daemons and sets up configuration to monitor them.
This plugin configures the kafka_consumer plugin and does not configure any jmx based checks against kafka.
Note this plugin will pull the same information from kafka on each node in the cluster it runs on.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_cmdline('kafka') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_cmdline('kafka') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
config = agent_config.Plugins()
config = monsetup.agent_config.Plugins()
# First watch the process
config.merge(watch_process(['kafka']))
config.merge(monsetup.detection.watch_process(['kafka']))
log.info("\tWatching the kafka process.")
if self.dependencies_installed():

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Keystone(ServicePlugin):
class Keystone(monsetup.detection.ServicePlugin):
"""Detect Keystone daemons and setup configuration to monitor them."""
"""Detect Keystone daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True):
service_params = {

View File

@ -1,28 +1,33 @@
"""Classes for monitoring the monitoring server stack.
Covering mon-persister, mon-api and mon-thresh.
Kafka, mysql, vertica and influxdb are covered by other detection plugins. Mon-notification uses statsd.
"""
import logging
from monsetup.detection import Plugin, find_process_cmdline, watch_process
from monsetup import agent_config
import monsetup.agent_config
import monsetup.detection
log = logging.getLogger(__name__)
class MonPersister(Plugin):
class MonPersister(monsetup.detection.Plugin):
"""Detect mon_persister and setup monitoring.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_cmdline('mon-persister') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_cmdline('mon-persister') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
log.info("\tEnabling the mon persister healthcheck")
return dropwizard_health_check('mon-persister', 'http://localhost:8091/healthcheck')
@ -35,14 +40,15 @@ class MonPersister(Plugin):
return True
class MonAPI(Plugin):
class MonAPI(monsetup.detection.Plugin):
"""Detect mon_api and setup monitoring.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_cmdline('mon-api') is not None:
if monsetup.detection.find_process_cmdline('mon-api') is not None:
self.available = True
def build_config(self):
@ -59,28 +65,35 @@ class MonAPI(Plugin):
return True
class MonThresh(Plugin):
class MonThresh(monsetup.detection.Plugin):
"""Detect the running mon-thresh and monitor"""
"""Detect the running mon-thresh and monitor.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_cmdline('mon-thresh') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_cmdline('mon-thresh') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
log.info("\tWatching the mon-thresh process.")
return watch_process(['mon-thresh'])
return monsetup.detection.watch_process(['mon-thresh'])
def dependencies_installed(self):
return True
def dropwizard_health_check(name, url):
"""Setup a dropwizard heathcheck to be watched by the http_check plugin."""
config = agent_config.Plugins()
"""Setup a dropwizard heathcheck to be watched by the http_check plugin.
"""
config = monsetup.agent_config.Plugins()
config['http_check'] = {'init_config': None,
'instances': [{'name': name,
'url': url,

View File

@ -1,16 +1,17 @@
import logging
from monsetup.detection import Plugin, find_process_name, watch_process
from monsetup import agent_config
import monsetup.agent_config
import monsetup.detection
log = logging.getLogger(__name__)
mysql_conf = '/root/.my.cnf'
class MySQL(Plugin):
class MySQL(monsetup.detection.Plugin):
"""Detect MySQL daemons and setup configuration to monitor them.
This plugin needs user/pass infor for mysql setup, this is
best placed in /root/.my.cnf in a format such as
[client]
@ -19,16 +20,19 @@ class MySQL(Plugin):
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected"""
if find_process_name('mysqld') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_name('mysqld') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
config = agent_config.Plugins()
config = monsetup.agent_config.Plugins()
# First watch the process
config.merge(watch_process(['mysqld']))
config.merge(monsetup.detection.watch_process(['mysqld']))
log.info("\tWatching the mysqld process.")
# Attempt login, requires either an empty root password from localhost

View File

@ -1,4 +1,5 @@
import os
import yaml
from monsetup.detection import Plugin
@ -8,14 +9,18 @@ from monsetup import agent_config
class Network(Plugin):
"""No configuration here, working networking is assumed so this is either on or off.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
"""Run detection, set self.available True if the service is detected.
"""
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
# A bit silly to parse the yaml only for it to be converted back but this
# plugin is the exception not the rule

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Neutron(ServicePlugin):
class Neutron(monsetup.detection.ServicePlugin):
"""Detect Neutron daemons and setup configuration to monitor them."""
"""Detect Neutron daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True):
service_params = {

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Nova(ServicePlugin):
class Nova(monsetup.detection.ServicePlugin):
"""Detect Nova daemons and setup configuration to monitor them."""
"""Detect Nova daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True):
service_params = {

View File

@ -1,30 +1,35 @@
import os
import yaml
from monsetup.detection import Plugin, find_process_name
from monsetup import agent_config
import monsetup.agent_config
import monsetup.detection
class Postfix(Plugin):
class Postfix(monsetup.detection.Plugin):
"""If postfix is running install the default config.
"""If postfix is running install the default config
"""
# todo this is is disabled as postfix requires passwordless sudo for the
# monasca-agent user, a bad practice
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_name('postfix') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_name('postfix') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
# A bit silly to parse the yaml only for it to be converted back but this
# plugin is the exception not the rule
with open(os.path.join(self.template_dir, 'conf.d/postfix.yaml.example'), 'r') as postfix_template:
default_net_config = yaml.load(postfix_template.read())
config = agent_config.Plugins()
config = monsetup.agent_config.Plugins()
config['postfix'] = default_net_config
return config

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin
import monsetup.detection
class Swift(ServicePlugin):
class Swift(monsetup.detection.ServicePlugin):
"""Detect Swift daemons and setup configuration to monitor them."""
"""Detect Swift daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True):
service_params = {

View File

@ -1,30 +1,35 @@
import logging
import os
import yaml
from monsetup.detection import Plugin, find_process_cmdline, watch_process
from monsetup import agent_config
import monsetup.agent_config
import monsetup.detection
log = logging.getLogger(__name__)
class Zookeeper(Plugin):
class Zookeeper(monsetup.detection.Plugin):
"""Detect Zookeeper daemons and setup configuration to monitor them.
"""
def _detect(self):
"""Run detection, set self.available True if the service is detected."""
if find_process_cmdline('zookeeper') is not None:
"""Run detection, set self.available True if the service is detected.
"""
if monsetup.detection.find_process_cmdline('zookeeper') is not None:
self.available = True
def build_config(self):
"""Build the config as a Plugins object and return.
"""
config = agent_config.Plugins()
config = monsetup.agent_config.Plugins()
# First watch the process
log.info("\tWatching the zookeeper process.")
config.merge(watch_process(['zookeeper']))
config.merge(monsetup.detection.watch_process(['zookeeper']))
log.info("\tEnabling the zookeeper plugin")
with open(os.path.join(self.template_dir, 'conf.d/zk.yaml.example'), 'r') as zk_template:

View File

@ -1,8 +1,11 @@
import logging
from plugin import Plugin
from monsetup import agent_config
from monsetup.detection import find_process_cmdline, watch_process, service_api_check
from monsetup.detection import find_process_cmdline
from monsetup.detection import service_api_check
from monsetup.detection import watch_process
log = logging.getLogger(__name__)
@ -11,6 +14,7 @@ log = logging.getLogger(__name__)
class ServicePlugin(Plugin):
"""Base class implemented by the monasca-agent plugin detection classes
for OpenStack Services
"""
@ -23,7 +27,9 @@ class ServicePlugin(Plugin):
super(ServicePlugin, self).__init__(kwargs['template_dir'], kwargs['overwrite'])
def _detect(self):
"""Run detection"""
"""Run detection.
"""
self.found_processes = []
for process in self.process_names:
@ -34,6 +40,7 @@ class ServicePlugin(Plugin):
def build_config(self):
"""Build the config as a Plugins object and return.
"""
config = agent_config.Plugins()
for process in self.found_processes:
@ -50,6 +57,7 @@ class ServicePlugin(Plugin):
return config
def dependencies_installed(self):
"""return True if dependencies are installed
"""Return True if dependencies are installed.
"""
return True

View File

@ -6,7 +6,8 @@ from monsetup import agent_config
def find_process_cmdline(search_string):
"""Simple function to search running process for one with cmdline containing
"""Simple function to search running process for one with cmdline containing.
"""
for process in psutil.process_iter():
for arg in process.cmdline():
@ -18,6 +19,7 @@ def find_process_cmdline(search_string):
def find_process_name(pname):
"""Simple function to search running process for one with pname.
"""
for process in psutil.process_iter():
if pname == process.name():
@ -28,6 +30,7 @@ def find_process_name(pname):
def watch_process(search_strings, service=None, component=None):
"""Takes a list of process search strings and returns a Plugins object with the config set.
This was built as a helper as many plugins setup process watching
"""
config = agent_config.Plugins()
@ -44,7 +47,9 @@ def watch_process(search_strings, service=None, component=None):
def service_api_check(name, url, pattern, service=None, component=None):
"""Setup a service api to be watched by the http_check plugin."""
"""Setup a service api to be watched by the http_check plugin.
"""
config = agent_config.Plugins()
parameters = {'name': name,
'url': url,

View File

@ -9,13 +9,22 @@ import pwd
import socket
import subprocess
import sys
import yaml
import platform
import yaml
import agent_config
from detection.plugins import kafka, mon, mysql, network, zookeeper
from detection.plugins import nova, glance, cinder, neutron, swift
from detection.plugins import keystone, ceilometer
from detection.plugins import kafka
from detection.plugins import mon
from detection.plugins import mysql
from detection.plugins import network
from detection.plugins import zookeeper
from detection.plugins import nova
from detection.plugins import glance
from detection.plugins import cinder
from detection.plugins import neutron
from detection.plugins import swift
from detection.plugins import keystone
from detection.plugins import ceilometer
from service import sysv
# List of all detection plugins to run

View File

@ -1,11 +1,14 @@
"""Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately
"""Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately.
"""
import psutil
class Service(object):
"""Abstract base class implementing the interface for various service types."""
"""Abstract base class implementing the interface for various service types.
"""
def __init__(self, config_dir, log_dir, name='monasca-agent'):
self.config_dir = config_dir
@ -14,29 +17,34 @@ class Service(object):
def enable(self):
"""Sets monasca-agent to start on boot.
Generally this requires running as super user
"""
raise NotImplementedError
def start(self, restart=True):
"""Starts monasca-agent
"""Starts monasca-agent.
If the agent is running and restart is True, restart
"""
raise NotImplementedError
def stop(self):
"""Stops monasca-agent
"""Stops monasca-agent.
"""
raise NotImplementedError
def is_enabled(self):
"""Returns True if monasca-agent is setup to start on boot, false otherwise
"""Returns True if monasca-agent is setup to start on boot, false otherwise.
"""
raise NotImplementedError
@staticmethod
def is_running():
"""Returns True if monasca-agent is running, false otherwise
"""Returns True if monasca-agent is running, false otherwise.
"""
# Looking for the supervisor process not the individual components
for process in psutil.process_iter():

View File

@ -1,20 +1,23 @@
"""System V style service.
"""
from glob import glob
import glob
import logging
import os
import pwd
import subprocess
from . import Service
import service
log = logging.getLogger(__name__)
class SysV(Service):
class SysV(service.Service):
def __init__(self, init_template, config_dir, log_dir, name='monasca-agent', username='monasca-agent'):
"""Setup this service with the given init template"""
"""Setup this service with the given init template.
"""
super(SysV, self).__init__(config_dir, log_dir, name)
self.init_script = '/etc/init.d/%s' % self.name
self.init_template = init_template
@ -22,6 +25,7 @@ class SysV(Service):
def enable(self):
"""Sets monasca-agent to start on boot.
Generally this requires running as super user
"""
# Create monasca-agent user/group if needed
@ -53,7 +57,8 @@ class SysV(Service):
log.info('Enabled {0} service via SysV init script'.format(self.name))
def start(self, restart=True):
"""Starts monasca-agent
"""Starts monasca-agent.
If the agent is running and restart is True, restart
"""
if not self.is_enabled():
@ -65,7 +70,8 @@ class SysV(Service):
return True
def stop(self):
"""Stops monasca-agent
"""Stops monasca-agent.
"""
if not self.is_enabled():
log.error('The service is not enabled')
@ -76,12 +82,13 @@ class SysV(Service):
return True
def is_enabled(self):
"""Returns True if monasca-agent is setup to start on boot, false otherwise
"""Returns True if monasca-agent is setup to start on boot, false otherwise.
"""
if not os.path.exists(self.init_script):
return False
if len(glob('/etc/rc?.d/S??monasca-agent')) > 0:
if len(glob.glob('/etc/rc?.d/S??monasca-agent')) > 0:
return True
else:
return False

View File

@ -1,6 +1,22 @@
from setuptools import setup
#!/usr/bin/env python
setup(
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import setuptools
setuptools.setup(
setup_requires=['pbr'],
pbr=True
)
pbr=True)

View File

@ -75,9 +75,4 @@ def get_check(name, config_str):
raise Exception(
"Unable to import check %s. Missing a class that inherits AgentCheck" % name)
agent_config = {
'version': '0.1',
'api_key': 'tota'
}
return check_class.from_yaml(yaml_text=config_str, check_name=name)

View File

@ -73,7 +73,6 @@ class TestCacti(unittest.TestCase):
# Check once more to make sure last_ts ignores None vals when calculating
# where to start from
check.check(instances[0])
results3 = check.get_metrics()
last_ts2 = check.last_ts[rrd_dir + '/localhost_hdd_free_10.rrd.AVERAGE']
self.assertEqual(last_ts1, last_ts2)

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self):
aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start)

View File

@ -212,6 +212,7 @@ class TestDogstream(TailTestCase):
'dogstreams': '%s:tests.test_datadog:parse_ancient_function_plugin' %
self.log_file.name})
actual_output = plugdog.check(self.config, move_end=False)
self.assertEqual(expected_output, actual_output)
def test_dogstream_function_plugin(self):
"""Ensure that non-class-based stateful plugins work"""

View File

@ -26,6 +26,7 @@ class TestElastic(unittest.TestCase):
loop += 1
if loop >= MAX_WAIT:
break
return request
def setUp(self):
self.process = None

View File

@ -38,6 +38,7 @@ class HaproxyTestCase(unittest.TestCase):
loop += 1
if loop >= MAX_WAIT:
break
return request
def setUp(self):
self.process = None

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self):
aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start)

View File

@ -52,7 +52,7 @@ class TestMemCache(unittest.TestCase):
# Check that we got 21 metrics for a specific host
self.assertEqual(
len([t for t in r if t[3].get('dimensions') == {"instance": mythirdtag}]), 21, r)
len([t for t in r if t[3].get('dimensions') == {"instance": "mythirdtag"}]), 21, r)
def testDimensions(self):
raise SkipTest('Requires mcache')

View File

@ -79,7 +79,6 @@ class TestMongo(unittest.TestCase):
c1.admin.command("replSetInitiate")
# Sleep for 15s until replication is stable
time.sleep(30)
x = c1.admin.command("replSetGetStatus")
assert pymongo.Connection('localhost:%s' % PORT2)
except Exception:
logging.getLogger().exception("Cannot instantiate mongod properly")

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self):
aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start)

View File

@ -1,11 +1,17 @@
import unittest
from functools import reduce
import logging
import platform
import re
import unittest
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__file__)
from monagent.collector.checks.system.unix import *
from common import get_check
from monagent.collector.checks.system.unix import Cpu
from monagent.collector.checks.system.unix import Disk
from monagent.collector.checks.system.unix import IO
from monagent.collector.checks.system.unix import Memory
class TestSystem(unittest.TestCase):
@ -115,7 +121,7 @@ none 985964 1 985963 1% /lib/init/rw
def test_collecting_disk_metrics(self):
"""Testing disk stats gathering"""
if Platform.is_unix():
if platform.system() == 'Linux':
disk = Disk(logger, {})
res = disk.check()
# Assert we have disk & inode stats
@ -126,7 +132,7 @@ none 985964 1 985963 1% /lib/init/rw
def testMemory(self):
global logger
res = Memory(logger).check()
if Platform.is_linux():
if platform.system() == 'Linux':
for k in (
"swapTotal", "swapFree", "swapPctFree", "swapUsed", "physTotal", "physFree",
"physUsed", "physBuffers", "physCached", "physUsable", "physPctUsable",
@ -134,7 +140,7 @@ none 985964 1 985963 1% /lib/init/rw
assert k in res, res
assert res["swapTotal"] == res["swapFree"] + res["swapUsed"]
assert res["physTotal"] == res["physFree"] + res["physUsed"]
elif sys.platform == 'darwin':
elif platform.system() == 'Darwin':
for k in ("swapFree", "swapUsed", "physFree", "physUsed"):
assert k in res, res

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self):
aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start)

View File

@ -102,6 +102,7 @@ class PseudoAgent(object):
x = 0
while True:
x = random()
return x
@staticmethod
def hanging_net():

View File

@ -1,5 +1,6 @@
import unittest
import logging
import win32evtlog
from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest
@ -43,7 +44,6 @@ class WinEventLogTest(unittest.TestCase):
def setUp(self):
raise SkipTest("Requires win32evtlog module")
import win32evtlog
self.LOG_EVENTS = [
('Test 1', win32evtlog.EVENTLOG_WARNING_TYPE),
('Test 2', win32evtlog.EVENTLOG_ERROR_TYPE),

20
tox.ini
View File

@ -20,22 +20,12 @@ commands = flake8
commands = {posargs}
[flake8]
max-line-length = 120
# TODO: ignored checks should be enabled in the future
# H201 no 'except:' at least use 'except Exception:'
# H202 assertRaises Exception too broad
# H237 module is removed in Python
# H301 one import per line
# H305 imports not grouped correctly
# H306 imports not in alphabetical order
# H307 like imports should be grouped together
# H401 docstring should not start with a space
# H402 one line docstring needs punctuation.
# H403 multi line docstrings should end on a new line
# H404 multi line docstring should start without a leading new line
# H405 multi line docstring summary not separated with an empty line
# E501 Line length > 80 characters
# F401 module imported but unused
# F821 undefined name
# F841 local variable is assigned to but never used
ignore = E501,H201,H202,H237,H301,H305,H306,H307,H401,H402,H403,H404,H405,H904,F401,F403,F821,F841
# H302 import only modules
# H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)
ignore = E501, F401, H302, H904,
show-source = True
exclude=.venv,.git,.tox,dist,*egg,build