Merge pull request #17 from tkuhlman/feature/cleanup

Feature/cleanup
2014-06-30 16:54:27 -06:00 · 2014-06-30 16:54:27 -06:00 · 52ed313d0c
parent bacee52c9c 0265d20241
commit 52ed313d0c
25 changed files with 1338 additions and 1324 deletions
--- a/monagent/collector/checks/init.py
+++ b/monagent/collector/checks/init.py
@ -1,578 +1 @@
-"""Base class for Checks.
-
-If you are writing your own checks you should subclass the AgentCheck class.
-The Check class is being deprecated so don't write new checks with it.
-"""
-
-import logging
-import re
-import time
-import os
-import traceback
-from pprint import pprint
-
-from monagent.common import check_status
-from monagent.common.exceptions import NaN, CheckException, Infinity, UnknownValue
-from monagent.common.util import LaconicFilter, get_os, get_hostname
-from monagent.common.config import get_confd_path
-
-
-log = logging.getLogger(__name__)
-
-
-# Constants
-
-
-# todo convert all checks to the new interface then remove this. Is the LaconicFilter on logs used elsewhere?
-#==============================================================================
-# DEPRECATED
-# ------------------------------
-# If you are writing your own check, you should inherit from AgentCheck
-# and not this class. This class will be removed in a future version
-# of the agent.
-#==============================================================================
-class Check(object):
-    """
-    (Abstract) class for all checks with the ability to:
-    * store 1 (and only 1) sample for gauges per metric/tag combination
-    * compute rates for counters
-    * only log error messages once (instead of each time they occur)
-
-    """
-
-    def __init__(self, logger, agent_config=None):
-        # where to store samples, indexed by metric_name
-        # metric_name: {("sorted", "dimensions"): [(ts, value), (ts, value)],
-        #                 tuple(dimensions) are stored as a key since lists are not hashable
-        #               None: [(ts, value), (ts, value)]}
-        #                 untagged values are indexed by None
-        self.agent_config = agent_config
-        self._sample_store = {}
-        self._counters = {}  # metric_name: bool
-        self.logger = logger
-        try:
-            self.logger.addFilter(LaconicFilter())
-        except Exception:
-            self.logger.exception("Trying to install laconic log filter and failed")
-
-    @staticmethod
-    def normalize(metric, prefix=None):
-        """Turn a metric into a well-formed metric name
-        prefix.b.c
-        """
-        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
-        # Eliminate multiple _
-        name = re.sub(r"__+", "_", name)
-        # Don't start/end with _
-        name = re.sub(r"^_", "", name)
-        name = re.sub(r"_$", "", name)
-        # Drop ._ and _.
-        name = re.sub(r"\._", ".", name)
-        name = re.sub(r"_\.", ".", name)
-
-        if prefix is not None:
-            return prefix + "." + name
-        else:
-            return name
-
-    @staticmethod
-    def normalize_device_name(device_name):
-        return device_name.strip().lower().replace(' ', '_')
-
-    def counter(self, metric):
-        """
-        Treats the metric as a counter, i.e. computes its per second derivative
-        ACHTUNG: Resets previous values associated with this metric.
-        """
-        self._counters[metric] = True
-        self._sample_store[metric] = {}
-
-    def is_counter(self, metric):
-        "Is this metric a counter?"
-        return metric in self._counters
-
-    def gauge(self, metric):
-        """
-        Treats the metric as a gauge, i.e. keep the data as is
-        ACHTUNG: Resets previous values associated with this metric.
-        """
-        self._sample_store[metric] = {}
-
-    def is_metric(self, metric):
-        return metric in self._sample_store
-
-    def is_gauge(self, metric):
-        return self.is_metric(metric) and not self.is_counter(metric)
-
-    def get_metric_names(self):
-        "Get all metric names"
-        return self._sample_store.keys()
-
-    def save_gauge(self, metric, value, timestamp=None, dimensions=None, hostname=None, device_name=None):
-        """ Save a gauge value. """
-        if not self.is_gauge(metric):
-            self.gauge(metric)
-        self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
-
-    def save_sample(self, metric, value, timestamp=None, dimensions=None, hostname=None, device_name=None):
-        """Save a simple sample, evict old values if needed
-        """
-        if dimensions is None:
-            dimensions = {}
-        from common.util import cast_metric_val
-
-        if timestamp is None:
-            timestamp = time.time()
-        if metric not in self._sample_store:
-            raise CheckException("Saving a sample for an undefined metric: %s" % metric)
-        try:
-            value = cast_metric_val(value)
-        except ValueError, ve:
-            raise NaN(ve)
-
-        # Sort and validate dimensions
-        if dimensions is not None and not isinstance(dimensions, dict):
-            raise CheckException("Dimensions must be a dictionary")
-
-        # Data eviction rules
-        key = (tuple(sorted(dimensions.items())), device_name)
-        if self.is_gauge(metric):
-            self._sample_store[metric][key] = ((timestamp, value, hostname, device_name), )
-        elif self.is_counter(metric):
-            if self._sample_store[metric].get(key) is None:
-                self._sample_store[metric][key] = [(timestamp, value, hostname, device_name)]
-            else:
-                self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
-                                                  [(timestamp, value, hostname, device_name)]
-        else:
-            raise CheckException("%s must be either gauge or counter, skipping sample at %s" %
-                                 (metric, time.ctime(timestamp)))
-
-        if self.is_gauge(metric):
-            # store[metric][dimensions] = (ts, val) - only 1 value allowed
-            assert len(self._sample_store[metric][key]) == 1, self._sample_store[metric]
-        elif self.is_counter(metric):
-            assert len(self._sample_store[metric][key]) in (1, 2), self._sample_store[metric]
-
-    @classmethod
-    def _rate(cls, sample1, sample2):
-        "Simple rate"
-        try:
-            interval = sample2[0] - sample1[0]
-            if interval == 0:
-                raise Infinity()
-
-            delta = sample2[1] - sample1[1]
-            if delta < 0:
-                raise UnknownValue()
-
-            return (sample2[0], delta / interval, sample2[2], sample2[3])
-        except Infinity:
-            raise
-        except UnknownValue:
-            raise
-        except Exception, e:
-            raise NaN(e)
-
-    def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
-        """Get (timestamp-epoch-style, value)
-        """
-        if dimensions is None:
-            dimensions = {}
-
-        # Get the proper dimensions
-        key = (tuple(sorted(dimensions.items())), device_name)
-
-        # Never seen this metric
-        if metric not in self._sample_store:
-            raise UnknownValue()
-
-        # Not enough value to compute rate
-        elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
-            raise UnknownValue()
-
-        elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
-            res = self._rate(self._sample_store[metric][key][-2], self._sample_store[metric][key][-1])
-            if expire:
-                del self._sample_store[metric][key][:-1]
-            return res
-
-        elif self.is_gauge(metric) and len(self._sample_store[metric][key]) >= 1:
-            return self._sample_store[metric][key][-1]
-
-        else:
-            raise UnknownValue()
-
-    def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
-        "Return the last value for that metric"
-        x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
-        assert isinstance(x, tuple) and len(x) == 4, x
-        return x[1]
-
-    def get_samples_with_timestamps(self, expire=True):
-        "Return all values {metric: (ts, value)} for non-tagged metrics"
-        values = {}
-        for m in self._sample_store:
-            try:
-                values[m] = self.get_sample_with_timestamp(m, expire=expire)
-            except Exception:
-                pass
-        return values
-
-    def get_samples(self, expire=True):
-        "Return all values {metric: value} for non-tagged metrics"
-        values = {}
-        for m in self._sample_store:
-            try:
-                # Discard the timestamp
-                values[m] = self.get_sample_with_timestamp(m, expire=expire)[1]
-            except Exception:
-                pass
-        return values
-
-    def get_metrics(self, expire=True):
-        """Get all metrics, including the ones that are tagged.
-        This is the preferred method to retrieve metrics
-
-        @return the list of samples
-        @rtype [(metric_name, timestamp, value, {"dimensions": {"name1": "key1", "name2": "key2"}}), ...]
-        """
-        metrics = []
-        for m in self._sample_store:
-            try:
-                for key in self._sample_store[m]:
-                    dimensions_list, device_name = key
-                    dimensions = dict(dimensions_list)
-                    try:
-                        ts, val, hostname, device_name = self.get_sample_with_timestamp(m, dimensions, device_name, expire)
-                    except UnknownValue:
-                        continue
-                    attributes = {}
-                    if dimensions_list:
-                        attributes['dimensions'] = dimensions
-                    if hostname:
-                        attributes['host_name'] = hostname
-                    if device_name:
-                        attributes['device_name'] = device_name
-                    metrics.append((m, int(ts), val, attributes))
-            except Exception:
-                pass
-        return metrics
-
-
-class AgentCheck(object):
-
-    def __init__(self, name, init_config, agent_config, instances=None):
-        """
-        Initialize a new check.
-
-        :param name: The name of the check
-        :param init_config: The config for initializing the check
-        :param agent_config: The global configuration for the agent
-        :param instances: A list of configuration objects for each instance.
-        """
-        from monagent.common.aggregator import MetricsAggregator
-
-        self.name = name
-        self.init_config = init_config
-        self.agent_config = agent_config
-        self.hostname = get_hostname(agent_config)
-        self.log = logging.getLogger('%s.%s' % (__name__, name))
-
-        self.aggregator = MetricsAggregator(self.hostname,
-                                            recent_point_threshold=agent_config.get('recent_point_threshold', None))
-
-        self.events = []
-        self.instances = instances or []
-        self.warnings = []
-        self.library_versions = None
-
-    def instance_count(self):
-        """ Return the number of instances that are configured for this check. """
-        return len(self.instances)
-
-    def gauge(self, metric, value, dimensions=None, hostname=None, device_name=None, timestamp=None):
-        """
-        Record the value of a gauge, with optional dimensions, hostname and device
-        name.
-
-        :param metric: The name of the metric
-        :param value: The value of the gauge
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        :param timestamp: (optional) The timestamp for this metric value
-        """
-        self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
-
-    def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
-        """
-        Increment a counter with optional dimensions, hostname and device name.
-
-        :param metric: The name of the metric
-        :param value: The value to increment by
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        """
-        self.aggregator.increment(metric, value, dimensions, hostname, device_name)
-
-    def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
-        """
-        Increment a counter with optional dimensions, hostname and device name.
-
-        :param metric: The name of the metric
-        :param value: The value to decrement by
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        """
-        self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
-
-    def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
-        """
-        Submit a point for a metric that will be calculated as a rate on flush.
-        Values will persist across each call to `check` if there is not enough
-        point to generate a rate on the flush.
-
-        :param metric: The name of the metric
-        :param value: The value of the rate
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        """
-        self.aggregator.rate(metric, value, dimensions, hostname, device_name)
-
-    def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
-        """
-        Sample a histogram value, with optional dimensions, hostname and device name.
-
-        :param metric: The name of the metric
-        :param value: The value to sample for the histogram
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        """
-        self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
-
-    def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
-        """
-        Sample a set value, with optional dimensions, hostname and device name.
-
-        :param metric: The name of the metric
-        :param value: The value for the set
-        :param dimensions: (optional) A dictionary of dimensions for this metric
-        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
-        :param device_name: (optional) The device name for this metric
-        """
-        self.aggregator.set(metric, value, dimensions, hostname, device_name)
-
-    def event(self, event):
-        """
-        Save an event.
-
-        :param event: The event payload as a dictionary. Has the following
-        structure:
-
-            {
-                "timestamp": int, the epoch timestamp for the event,
-                "event_type": string, the event time name,
-                "api_key": string, the api key of the account to associate the event with,
-                "msg_title": string, the title of the event,
-                "msg_text": string, the text body of the event,
-                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
-                    Defaults to 'info'.
-                "source_type_name": (optional) string, the source type name,
-                "host": (optional) string, the name of the host,
-                "dimensions": (optional) a dictionary of dimensions to associate with this event
-            }
-        """
-        if event.get('api_key') is None:
-            event['api_key'] = self.agent_config['api_key']
-        self.events.append(event)
-
-    def has_events(self):
-        """
-        Check whether the check has saved any events
-
-        @return whether or not the check has saved any events
-        @rtype boolean
-        """
-        return len(self.events) > 0
-
-    def get_metrics(self):
-        """
-        Get all metrics, including the ones that are tagged.
-
-        @return the list of samples
-        @rtype list of Measurement objects from monagent.common.metrics
-        """
-        return self.aggregator.flush()
-
-    def get_events(self):
-        """
-        Return a list of the events saved by the check, if any
-
-        @return the list of events saved by this check
-        @rtype list of event dictionaries
-        """
-        events = self.events
-        self.events = []
-        return events
-
-    def has_warnings(self):
-        """
-        Check whether the instance run created any warnings
-        """
-        return len(self.warnings) > 0
-
-    def warning(self, warning_message):
-        """ Add a warning message that will be printed in the info page
-        :param warning_message: String. Warning message to be displayed
-        """
-        self.warnings.append(warning_message)
-
-    def get_library_info(self):
-        if self.library_versions is not None:
-            return self.library_versions
-        try:
-            self.library_versions = self.get_library_versions()
-        except NotImplementedError:
-            pass
-
-    def get_library_versions(self):
-        """ Should return a string that shows which version
-        of the needed libraries are used """
-        raise NotImplementedError
-
-    def get_warnings(self):
-        """
-        Return the list of warnings messages to be displayed in the info page
-        """
-        warnings = self.warnings
-        self.warnings = []
-        return warnings
-
-    def run(self):
-        """ Run all instances. """
-        instance_statuses = []
-        for i, instance in enumerate(self.instances):
-            try:
-                self.check(instance)
-                if self.has_warnings():
-                    instance_status = check_status.InstanceStatus(i,
-                                                                  check_status.STATUS_WARNING,
-                                                                  warnings=self.get_warnings())
-                else:
-                    instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK)
-            except Exception, e:
-                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
-                instance_status = check_status.InstanceStatus(i,
-                                                              check_status.STATUS_ERROR,
-                                                              error=e,
-                                                              tb=traceback.format_exc())
-            instance_statuses.append(instance_status)
-        return instance_statuses
-
-    def check(self, instance):
-        """
-        Overriden by the check class. This will be called to run the check.
-
-        :param instance: A dict with the instance information. This will vary
-        depending on your config structure.
-        """
-        raise NotImplementedError()
-
-    @staticmethod
-    def stop():
-        """
-        To be executed when the agent is being stopped to clean ressources
-        """
-        pass
-
-    @classmethod
-    def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
-        """
-        A method used for testing your check without running the agent.
-        """
-        import yaml
-        try:
-            from yaml import CLoader as Loader
-        except ImportError:
-            from yaml import Loader
-        if path_to_yaml:
-            check_name = os.path.basename(path_to_yaml).split('.')[0]
-            try:
-                f = open(path_to_yaml)
-            except IOError:
-                raise Exception('Unable to open yaml config: %s' % path_to_yaml)
-            yaml_text = f.read()
-            f.close()
-
-        config = yaml.load(yaml_text, Loader=Loader)
-        check = cls(check_name, config.get('init_config') or {}, agentConfig or {})
-
-        return check, config.get('instances', [])
-
-    @staticmethod
-    def normalize(metric, prefix=None):
-        """
-        Turn a metric into a well-formed metric name
-        prefix.b.c
-
-        :param metric The metric name to normalize
-        :param prefix A prefix to to add to the normalized name, default None
-        """
-        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
-        # Eliminate multiple _
-        name = re.sub(r"__+", "_", name)
-        # Don't start/end with _
-        name = re.sub(r"^_", "", name)
-        name = re.sub(r"_$", "", name)
-        # Drop ._ and _.
-        name = re.sub(r"\._", ".", name)
-        name = re.sub(r"_\.", ".", name)
-
-        if prefix is not None:
-            return prefix + "." + name
-        else:
-            return name
-
-    @staticmethod
-    def read_config(instance, key, message=None, cast=None):
-        val = instance.get(key)
-        if val is None:
-            message = message or 'Must provide `%s` value in instance config' % key
-            raise Exception(message)
-
-        if cast is None:
-            return val
-        else:
-            return cast(val)
-
-
-def run_check(name, path=None):
-    from tests.common import get_check
-
-    # Read the config file
-    confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name)
-
-    try:
-        f = open(confd_path)
-    except IOError:
-        raise Exception('Unable to open configuration at %s' % confd_path)
-
-    config_str = f.read()
-    f.close()
-
-    # Run the check
-    check, instances = get_check(name, config_str)
-    if not instances:
-        raise Exception('YAML configuration returned no instances.')
-    for instance in instances:
-        check.check(instance)
-        if check.has_events():
-            print "Events:\n"
-            pprint(check.get_events(), indent=4)
-        print "Metrics:\n"
-        pprint(check.get_metrics(), indent=4)
+from check import AgentCheck
--- a/monagent/collector/checks/check.py
+++ b/monagent/collector/checks/check.py
@ -0,0 +1,573 @@
+"""Base class for Checks.
+
+If you are writing your own checks you should subclass the AgentCheck class.
+The Check class is being deprecated so don't write new checks with it.
+"""
+import logging
+import os
+from pprint import pprint
+import re
+import time
+import traceback
+
+from monagent.common import check_status
+from monagent.common.config import get_confd_path
+from monagent.common.exceptions import CheckException, NaN, Infinity, UnknownValue
+from monagent.common.util import LaconicFilter, get_hostname, get_os
+
+log = logging.getLogger(__name__)
+
+
+# todo convert all checks to the new interface then remove this. Is the LaconicFilter on logs used elsewhere?
+#==============================================================================
+# DEPRECATED
+# ------------------------------
+# If you are writing your own check, you should inherit from AgentCheck
+# and not this class. This class will be removed in a future version
+# of the agent.
+#==============================================================================
+class Check(object):
+    """
+    (Abstract) class for all checks with the ability to:
+    * store 1 (and only 1) sample for gauges per metric/tag combination
+    * compute rates for counters
+    * only log error messages once (instead of each time they occur)
+
+    """
+
+    def __init__(self, logger, agent_config=None):
+        # where to store samples, indexed by metric_name
+        # metric_name: {("sorted", "dimensions"): [(ts, value), (ts, value)],
+        #                 tuple(dimensions) are stored as a key since lists are not hashable
+        #               None: [(ts, value), (ts, value)]}
+        #                 untagged values are indexed by None
+        self.agent_config = agent_config
+        self._sample_store = {}
+        self._counters = {}  # metric_name: bool
+        self.logger = logger
+        try:
+            self.logger.addFilter(LaconicFilter())
+        except Exception:
+            self.logger.exception("Trying to install laconic log filter and failed")
+
+    @staticmethod
+    def normalize(metric, prefix=None):
+        """Turn a metric into a well-formed metric name
+        prefix.b.c
+        """
+        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
+        # Eliminate multiple _
+        name = re.sub(r"__+", "_", name)
+        # Don't start/end with _
+        name = re.sub(r"^_", "", name)
+        name = re.sub(r"_$", "", name)
+        # Drop ._ and _.
+        name = re.sub(r"\._", ".", name)
+        name = re.sub(r"_\.", ".", name)
+
+        if prefix is not None:
+            return prefix + "." + name
+        else:
+            return name
+
+    @staticmethod
+    def normalize_device_name(device_name):
+        return device_name.strip().lower().replace(' ', '_')
+
+    def counter(self, metric):
+        """
+        Treats the metric as a counter, i.e. computes its per second derivative
+        ACHTUNG: Resets previous values associated with this metric.
+        """
+        self._counters[metric] = True
+        self._sample_store[metric] = {}
+
+    def is_counter(self, metric):
+        "Is this metric a counter?"
+        return metric in self._counters
+
+    def gauge(self, metric):
+        """
+        Treats the metric as a gauge, i.e. keep the data as is
+        ACHTUNG: Resets previous values associated with this metric.
+        """
+        self._sample_store[metric] = {}
+
+    def is_metric(self, metric):
+        return metric in self._sample_store
+
+    def is_gauge(self, metric):
+        return self.is_metric(metric) and not self.is_counter(metric)
+
+    def get_metric_names(self):
+        "Get all metric names"
+        return self._sample_store.keys()
+
+    def save_gauge(self, metric, value, timestamp=None, dimensions=None, hostname=None, device_name=None):
+        """ Save a gauge value. """
+        if not self.is_gauge(metric):
+            self.gauge(metric)
+        self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
+
+    def save_sample(self, metric, value, timestamp=None, dimensions=None, hostname=None, device_name=None):
+        """Save a simple sample, evict old values if needed
+        """
+        if dimensions is None:
+            dimensions = {}
+        from common.util import cast_metric_val
+
+        if timestamp is None:
+            timestamp = time.time()
+        if metric not in self._sample_store:
+            raise CheckException("Saving a sample for an undefined metric: %s" % metric)
+        try:
+            value = cast_metric_val(value)
+        except ValueError, ve:
+            raise NaN(ve)
+
+        # Sort and validate dimensions
+        if dimensions is not None and not isinstance(dimensions, dict):
+            raise CheckException("Dimensions must be a dictionary")
+
+        # Data eviction rules
+        key = (tuple(sorted(dimensions.items())), device_name)
+        if self.is_gauge(metric):
+            self._sample_store[metric][key] = ((timestamp, value, hostname, device_name), )
+        elif self.is_counter(metric):
+            if self._sample_store[metric].get(key) is None:
+                self._sample_store[metric][key] = [(timestamp, value, hostname, device_name)]
+            else:
+                self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
+                                                  [(timestamp, value, hostname, device_name)]
+        else:
+            raise CheckException("%s must be either gauge or counter, skipping sample at %s" %
+                                 (metric, time.ctime(timestamp)))
+
+        if self.is_gauge(metric):
+            # store[metric][dimensions] = (ts, val) - only 1 value allowed
+            assert len(self._sample_store[metric][key]) == 1, self._sample_store[metric]
+        elif self.is_counter(metric):
+            assert len(self._sample_store[metric][key]) in (1, 2), self._sample_store[metric]
+
+    @classmethod
+    def _rate(cls, sample1, sample2):
+        "Simple rate"
+        try:
+            interval = sample2[0] - sample1[0]
+            if interval == 0:
+                raise Infinity()
+
+            delta = sample2[1] - sample1[1]
+            if delta < 0:
+                raise UnknownValue()
+
+            return (sample2[0], delta / interval, sample2[2], sample2[3])
+        except Infinity:
+            raise
+        except UnknownValue:
+            raise
+        except Exception, e:
+            raise NaN(e)
+
+    def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
+        """Get (timestamp-epoch-style, value)
+        """
+        if dimensions is None:
+            dimensions = {}
+
+        # Get the proper dimensions
+        key = (tuple(sorted(dimensions.items())), device_name)
+
+        # Never seen this metric
+        if metric not in self._sample_store:
+            raise UnknownValue()
+
+        # Not enough value to compute rate
+        elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
+            raise UnknownValue()
+
+        elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
+            res = self._rate(self._sample_store[metric][key][-2], self._sample_store[metric][key][-1])
+            if expire:
+                del self._sample_store[metric][key][:-1]
+            return res
+
+        elif self.is_gauge(metric) and len(self._sample_store[metric][key]) >= 1:
+            return self._sample_store[metric][key][-1]
+
+        else:
+            raise UnknownValue()
+
+    def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
+        "Return the last value for that metric"
+        x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
+        assert isinstance(x, tuple) and len(x) == 4, x
+        return x[1]
+
+    def get_samples_with_timestamps(self, expire=True):
+        "Return all values {metric: (ts, value)} for non-tagged metrics"
+        values = {}
+        for m in self._sample_store:
+            try:
+                values[m] = self.get_sample_with_timestamp(m, expire=expire)
+            except Exception:
+                pass
+        return values
+
+    def get_samples(self, expire=True):
+        "Return all values {metric: value} for non-tagged metrics"
+        values = {}
+        for m in self._sample_store:
+            try:
+                # Discard the timestamp
+                values[m] = self.get_sample_with_timestamp(m, expire=expire)[1]
+            except Exception:
+                pass
+        return values
+
+    def get_metrics(self, expire=True):
+        """Get all metrics, including the ones that are tagged.
+        This is the preferred method to retrieve metrics
+
+        @return the list of samples
+        @rtype [(metric_name, timestamp, value, {"dimensions": {"name1": "key1", "name2": "key2"}}), ...]
+        """
+        metrics = []
+        for m in self._sample_store:
+            try:
+                for key in self._sample_store[m]:
+                    dimensions_list, device_name = key
+                    dimensions = dict(dimensions_list)
+                    try:
+                        ts, val, hostname, device_name = self.get_sample_with_timestamp(m, dimensions, device_name, expire)
+                    except UnknownValue:
+                        continue
+                    attributes = {}
+                    if dimensions_list:
+                        attributes['dimensions'] = dimensions
+                    if hostname:
+                        attributes['host_name'] = hostname
+                    if device_name:
+                        attributes['device_name'] = device_name
+                    metrics.append((m, int(ts), val, attributes))
+            except Exception:
+                pass
+        return metrics
+
+
+class AgentCheck(object):
+
+    def __init__(self, name, init_config, agent_config, instances=None):
+        """
+        Initialize a new check.
+
+        :param name: The name of the check
+        :param init_config: The config for initializing the check
+        :param agent_config: The global configuration for the agent
+        :param instances: A list of configuration objects for each instance.
+        """
+        from monagent.common.aggregator import MetricsAggregator
+
+        self.name = name
+        self.init_config = init_config
+        self.agent_config = agent_config
+        self.hostname = get_hostname(agent_config)
+        self.log = logging.getLogger('%s.%s' % (__name__, name))
+
+        self.aggregator = MetricsAggregator(self.hostname,
+                                            recent_point_threshold=agent_config.get('recent_point_threshold', None))
+
+        self.events = []
+        self.instances = instances or []
+        self.warnings = []
+        self.library_versions = None
+
+    def instance_count(self):
+        """ Return the number of instances that are configured for this check. """
+        return len(self.instances)
+
+    def gauge(self, metric, value, dimensions=None, hostname=None, device_name=None, timestamp=None):
+        """
+        Record the value of a gauge, with optional dimensions, hostname and device
+        name.
+
+        :param metric: The name of the metric
+        :param value: The value of the gauge
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        :param timestamp: (optional) The timestamp for this metric value
+        """
+        self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
+
+    def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
+        """
+        Increment a counter with optional dimensions, hostname and device name.
+
+        :param metric: The name of the metric
+        :param value: The value to increment by
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        """
+        self.aggregator.increment(metric, value, dimensions, hostname, device_name)
+
+    def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
+        """
+        Increment a counter with optional dimensions, hostname and device name.
+
+        :param metric: The name of the metric
+        :param value: The value to decrement by
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        """
+        self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
+
+    def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
+        """
+        Submit a point for a metric that will be calculated as a rate on flush.
+        Values will persist across each call to `check` if there is not enough
+        point to generate a rate on the flush.
+
+        :param metric: The name of the metric
+        :param value: The value of the rate
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        """
+        self.aggregator.rate(metric, value, dimensions, hostname, device_name)
+
+    def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
+        """
+        Sample a histogram value, with optional dimensions, hostname and device name.
+
+        :param metric: The name of the metric
+        :param value: The value to sample for the histogram
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        """
+        self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
+
+    def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
+        """
+        Sample a set value, with optional dimensions, hostname and device name.
+
+        :param metric: The name of the metric
+        :param value: The value for the set
+        :param dimensions: (optional) A dictionary of dimensions for this metric
+        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
+        :param device_name: (optional) The device name for this metric
+        """
+        self.aggregator.set(metric, value, dimensions, hostname, device_name)
+
+    def event(self, event):
+        """
+        Save an event.
+
+        :param event: The event payload as a dictionary. Has the following
+        structure:
+
+            {
+                "timestamp": int, the epoch timestamp for the event,
+                "event_type": string, the event time name,
+                "api_key": string, the api key of the account to associate the event with,
+                "msg_title": string, the title of the event,
+                "msg_text": string, the text body of the event,
+                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
+                    Defaults to 'info'.
+                "source_type_name": (optional) string, the source type name,
+                "host": (optional) string, the name of the host,
+                "dimensions": (optional) a dictionary of dimensions to associate with this event
+            }
+        """
+        if event.get('api_key') is None:
+            event['api_key'] = self.agent_config['api_key']
+        self.events.append(event)
+
+    def has_events(self):
+        """
+        Check whether the check has saved any events
+
+        @return whether or not the check has saved any events
+        @rtype boolean
+        """
+        return len(self.events) > 0
+
+    def get_metrics(self):
+        """
+        Get all metrics, including the ones that are tagged.
+
+        @return the list of samples
+        @rtype list of Measurement objects from monagent.common.metrics
+        """
+        return self.aggregator.flush()
+
+    def get_events(self):
+        """
+        Return a list of the events saved by the check, if any
+
+        @return the list of events saved by this check
+        @rtype list of event dictionaries
+        """
+        events = self.events
+        self.events = []
+        return events
+
+    def has_warnings(self):
+        """
+        Check whether the instance run created any warnings
+        """
+        return len(self.warnings) > 0
+
+    def warning(self, warning_message):
+        """ Add a warning message that will be printed in the info page
+        :param warning_message: String. Warning message to be displayed
+        """
+        self.warnings.append(warning_message)
+
+    def get_library_info(self):
+        if self.library_versions is not None:
+            return self.library_versions
+        try:
+            self.library_versions = self.get_library_versions()
+        except NotImplementedError:
+            pass
+
+    def get_library_versions(self):
+        """ Should return a string that shows which version
+        of the needed libraries are used """
+        raise NotImplementedError
+
+    def get_warnings(self):
+        """
+        Return the list of warnings messages to be displayed in the info page
+        """
+        warnings = self.warnings
+        self.warnings = []
+        return warnings
+
+    def run(self):
+        """ Run all instances. """
+        instance_statuses = []
+        for i, instance in enumerate(self.instances):
+            try:
+                self.check(instance)
+                if self.has_warnings():
+                    instance_status = check_status.InstanceStatus(i,
+                                                                  check_status.STATUS_WARNING,
+                                                                  warnings=self.get_warnings())
+                else:
+                    instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK)
+            except Exception, e:
+                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
+                instance_status = check_status.InstanceStatus(i,
+                                                              check_status.STATUS_ERROR,
+                                                              error=e,
+                                                              tb=traceback.format_exc())
+            instance_statuses.append(instance_status)
+        return instance_statuses
+
+    def check(self, instance):
+        """
+        Overriden by the check class. This will be called to run the check.
+
+        :param instance: A dict with the instance information. This will vary
+        depending on your config structure.
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def stop():
+        """
+        To be executed when the agent is being stopped to clean ressources
+        """
+        pass
+
+    @classmethod
+    def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
+        """
+        A method used for testing your check without running the agent.
+        """
+        import yaml
+        try:
+            from yaml import CLoader as Loader
+        except ImportError:
+            from yaml import Loader
+        if path_to_yaml:
+            check_name = os.path.basename(path_to_yaml).split('.')[0]
+            try:
+                f = open(path_to_yaml)
+            except IOError:
+                raise Exception('Unable to open yaml config: %s' % path_to_yaml)
+            yaml_text = f.read()
+            f.close()
+
+        config = yaml.load(yaml_text, Loader=Loader)
+        check = cls(check_name, config.get('init_config') or {}, agentConfig or {})
+
+        return check, config.get('instances', [])
+
+    @staticmethod
+    def normalize(metric, prefix=None):
+        """
+        Turn a metric into a well-formed metric name
+        prefix.b.c
+
+        :param metric The metric name to normalize
+        :param prefix A prefix to to add to the normalized name, default None
+        """
+        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
+        # Eliminate multiple _
+        name = re.sub(r"__+", "_", name)
+        # Don't start/end with _
+        name = re.sub(r"^_", "", name)
+        name = re.sub(r"_$", "", name)
+        # Drop ._ and _.
+        name = re.sub(r"\._", ".", name)
+        name = re.sub(r"_\.", ".", name)
+
+        if prefix is not None:
+            return prefix + "." + name
+        else:
+            return name
+
+    @staticmethod
+    def read_config(instance, key, message=None, cast=None):
+        val = instance.get(key)
+        if val is None:
+            message = message or 'Must provide `%s` value in instance config' % key
+            raise Exception(message)
+
+        if cast is None:
+            return val
+        else:
+            return cast(val)
+
+
+def run_check(name, path=None):
+    from tests.common import get_check
+
+    # Read the config file
+    confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name)
+
+    try:
+        f = open(confd_path)
+    except IOError:
+        raise Exception('Unable to open configuration at %s' % confd_path)
+
+    config_str = f.read()
+    f.close()
+
+    # Run the check
+    check, instances = get_check(name, config_str)
+    if not instances:
+        raise Exception('YAML configuration returned no instances.')
+    for instance in instances:
+        check.check(instance)
+        if check.has_events():
+            print "Events:\n"
+            pprint(check.get_events(), indent=4)
+        print "Metrics:\n"
+        pprint(check.get_metrics(), indent=4)
--- a/monagent/collector/checks/datadog.py
+++ b/monagent/collector/checks/datadog.py
@ -6,7 +6,7 @@ from datetime import datetime
 from itertools import groupby  # >= python 2.4

 from utils import TailFile
-from . import LaconicFilter
+from monagent.common.util import LaconicFilter
 from monagent.collector import modules


--- a/monagent/collector/checks/system/unix.py
+++ b/monagent/collector/checks/system/unix.py
@ -12,7 +12,7 @@ import sys
 import time

 # project
-from .. import Check
+from monagent.collector.checks.check import Check
 from monagent.common.metrics import Measurement
 from monagent.common.util import Platform

--- a/monagent/collector/checks/system/win32.py
+++ b/monagent/collector/checks/system/win32.py
@ -1,4 +1,4 @@
-from .. import Check
+from monagent.collector.checks.check import Check

 try:
    import wmi
--- a/monagent/forwarder/init.py
+++ b/monagent/forwarder/init.py
@ -1,327 +0,0 @@
-#!/usr/bin/env python
-"""
-    Datadog
-    www.datadoghq.com
-    ----
-    Make sense of your IT Data
-
-    Licensed under Simplified BSD License (see LICENSE)
-    (C) Boxed Ice 2010 all rights reserved
-    (C) Datadog, Inc. 2010-2013 all rights reserved
-"""
-
-# set up logging before importing any other components
-from monagent.common.config import initialize_logging
-initialize_logging('forwarder')
-from monagent.common.config import get_logging_config
-
-import os
-os.umask(022)
-
-# Standard imports
-import logging
-import sys
-from datetime import timedelta
-import signal
-from socket import gaierror, error as socket_error
-
-# Tornado
-import tornado.httpclient
-import tornado.httpserver
-import tornado.ioloop
-import tornado.web
-from tornado.escape import json_decode
-from tornado.options import define, parse_command_line, options
-
-# agent import
-from api import MonAPI
-from monagent.common.check_status import ForwarderStatus
-from monagent.common.config import get_config
-from monagent.common.metrics import Measurement
-from monagent.common.util import Watchdog, get_tornado_ioloop
-from transaction import Transaction, TransactionManager
-
-log = logging.getLogger('forwarder')
-log.setLevel(get_logging_config()['log_level'] or logging.INFO)
-
-TRANSACTION_FLUSH_INTERVAL = 5000  # Every 5 seconds
-WATCHDOG_INTERVAL_MULTIPLIER = 10  # 10x flush interval
-
-# Maximum delay before replaying a transaction
-MAX_WAIT_FOR_REPLAY = timedelta(seconds=90)
-
-# Maximum queue size in bytes (when this is reached, old messages are dropped)
-MAX_QUEUE_SIZE = 30 * 1024 * 1024  # 30MB
-
-THROTTLING_DELAY = timedelta(microseconds=1000000/2)  # 2 msg/second
-
-
-class MetricTransaction(Transaction):
-
-    _application = None
-    _trManager = None
-    _endpoints = []
-
-    @classmethod
-    def set_application(cls, app):
-        cls._application = app
-
-    @classmethod
-    def set_tr_manager(cls, manager):
-        cls._trManager = manager
-
-    @classmethod
-    def get_tr_manager(cls):
-        return cls._trManager
-
-    @classmethod
-    def set_endpoints(cls, endpoint):
-        # todo we only have one endpoint option, generalize it better
-        # the immediate use case for two endpoints could be our own monitoring boxes, they could send to
-        # the main api and mini-mon api
-        cls._endpoints.append(endpoint)
-
-    def __init__(self, data, headers):
-        self._data = data
-        self._headers = headers
-
-        # Call after data has been set (size is computed in Transaction's init)
-        Transaction.__init__(self)
-
-        # Insert the transaction in the Manager
-        self._trManager.append(self)
-        log.debug("Created transaction %d" % self.get_id())
-        self._trManager.flush()
-
-    def __sizeof__(self):
-        return sys.getsizeof(self._data)
-
-    def flush(self):
-        try:
-            for endpoint in self._endpoints:
-                endpoint.post_metrics(self._data)
-        except Exception:
-            log.exception('Error flushing metrics to remote endpoints')
-            self._trManager.tr_error(self)
-        else:
-            self._trManager.tr_success(self)
-        self._trManager.flush_next()
-
-
-class StatusHandler(tornado.web.RequestHandler):
-
-    def get(self):
-        threshold = int(self.get_argument('threshold', -1))
-
-        m = MetricTransaction.get_tr_manager()
-
-        self.write("<table><tr><td>Id</td><td>Size</td><td>Error count</td><td>Next flush</td></tr>")
-        transactions = m.get_transactions()
-        for tr in transactions:
-            self.write("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" %
-                       (tr.get_id(), tr.get_size(), tr.get_error_count(), tr.get_next_flush()))
-        self.write("</table>")
-
-        if threshold >= 0:
-            if len(transactions) > threshold:
-                self.set_status(503)
-
-
-class AgentInputHandler(tornado.web.RequestHandler):
-
-    def post(self):
-        """Read the message and forward it to the intake
-            The message is expected to follow the format:
-
-        """
-        # read the message it should be a list of monagent.common.metrics.Measurements expressed as a dict
-        msg = tornado.escape.json_decode(self.request.body)
-        try:
-            log.debug(msg)
-            measurements = [Measurement(**m) for m in msg]
-        except Exception:
-            log.exception('Error parsing body of Agent Input')
-            raise tornado.web.HTTPError(500)
-
-        headers = self.request.headers
-
-        if len(measurements) > 0:
-            # Setup a transaction for this message
-            tr = MetricTransaction(measurements, headers)
-        else:
-            raise tornado.web.HTTPError(500)
-
-        self.write("Transaction: %s" % tr.get_id())
-
-
-class Forwarder(tornado.web.Application):
-
-    def __init__(self, port, agent_config, watchdog=True, skip_ssl_validation=False, use_simple_http_client=False):
-        self._port = int(port)
-        self._agentConfig = agent_config
-        self._metrics = {}
-        MetricTransaction.set_application(self)
-        MetricTransaction.set_endpoints(MonAPI(agent_config['Api']))
-        self._tr_manager = TransactionManager(MAX_WAIT_FOR_REPLAY, MAX_QUEUE_SIZE, THROTTLING_DELAY)
-        MetricTransaction.set_tr_manager(self._tr_manager)
-
-        self._watchdog = None
-        self.skip_ssl_validation = skip_ssl_validation or agent_config.get('skip_ssl_validation', False)
-        self.use_simple_http_client = use_simple_http_client
-        if self.skip_ssl_validation:
-            log.info("Skipping SSL hostname validation, useful when using a transparent proxy")
-
-        if watchdog:
-            watchdog_timeout = TRANSACTION_FLUSH_INTERVAL * WATCHDOG_INTERVAL_MULTIPLIER
-            self._watchdog = Watchdog(watchdog_timeout, max_mem_mb=agent_config.get('limit_memory_consumption', None))
-
-    def _post_metrics(self):
-
-        if len(self._metrics) > 0:
-            MetricTransaction(self._metrics, headers={'Content-Type': 'application/json'})
-            self._metrics = {}
-
-    # todo why is the tornado logging method overridden? Perhaps ditch this.
-    def log_request(self, handler):
-        """ Override the tornado logging method.
-        If everything goes well, log level is DEBUG.
-        Otherwise it's WARNING or ERROR depending on the response code. """
-        if handler.get_status() < 400:
-            log_method = log.debug
-        elif handler.get_status() < 500:
-            log_method = log.warning
-        else:
-            log_method = log.error
-        request_time = 1000.0 * handler.request.request_time()
-        log_method("%d %s %.2fms", handler.get_status(),
-                   handler._request_summary(), request_time)
-
-    def run(self):
-        handlers = [
-            (r"/intake/?", AgentInputHandler),
-            (r"/api/v1/series/?", AgentInputHandler),
-            (r"/status/?", StatusHandler),
-        ]
-
-        settings = dict(
-            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
-            xsrf_cookies=False,
-            debug=False,
-            log_function=self.log_request
-        )
-
-        non_local_traffic = self._agentConfig.get("non_local_traffic", False)
-
-        tornado.web.Application.__init__(self, handlers, **settings)
-        http_server = tornado.httpserver.HTTPServer(self)
-
-        try:
-            # non_local_traffic must be == True to match, not just some non-false value
-            if non_local_traffic is True:
-                http_server.listen(self._port)
-            else:
-                # localhost in lieu of 127.0.0.1 to support IPv6
-                try:
-                    http_server.listen(self._port, address="localhost")
-                except gaierror:
-                    log.warning("localhost seems undefined in your host file, using 127.0.0.1 instead")
-                    http_server.listen(self._port, address="127.0.0.1")
-                except socket_error, e:
-                    if "Errno 99" in str(e):
-                        log.warning("IPv6 doesn't seem to be fully supported. Falling back to IPv4")
-                        http_server.listen(self._port, address="127.0.0.1")
-                    else:
-                        raise
-        except socket_error, e:
-            log.exception("Socket error %s. Is another application listening on the same port ? Exiting", e)
-            sys.exit(1)
-        except Exception:
-            log.exception("Uncaught exception. Forwarder is exiting.")
-            sys.exit(1)
-
-        log.info("Listening on port %d" % self._port)
-
-        # Register callbacks
-        self.mloop = get_tornado_ioloop()
-
-        logging.getLogger().setLevel(get_logging_config()['log_level'] or logging.INFO)
-
-        def flush_trs():
-            if self._watchdog:
-                self._watchdog.reset()
-            self._post_metrics()
-            self._tr_manager.flush()
-
-        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs, TRANSACTION_FLUSH_INTERVAL, io_loop=self.mloop)
-
-        # Start everything
-        if self._watchdog:
-            self._watchdog.reset()
-        tr_sched.start()
-
-        self.mloop.start()
-        log.info("Stopped")
-
-    def stop(self):
-        self.mloop.stop()
-
-
-def init_forwarder(skip_ssl_validation=False, use_simple_http_client=False):
-    agent_config = get_config(parse_args=False)
-
-    port = agent_config.get('listen_port', 17123)
-    if port is None:
-        port = 17123
-    else:
-        port = int(port)
-
-    app = Forwarder(port, agent_config, skip_ssl_validation=skip_ssl_validation,
-                    use_simple_http_client=use_simple_http_client)
-
-    def sigterm_handler(signum, frame):
-        log.info("caught sigterm. stopping")
-        app.stop()
-
-    signal.signal(signal.SIGTERM, sigterm_handler)
-    signal.signal(signal.SIGINT, sigterm_handler)
-
-    return app
-
-
-def main():
-    define("sslcheck", default=1, help="Verify SSL hostname, on by default")
-    define("use_simple_http_client", default=0, help="Use Tornado SimpleHTTPClient instead of CurlAsyncHTTPClient")
-    args = parse_command_line()
-    skip_ssl_validation = False
-    use_simple_http_client = False
-
-    if unicode(options.sslcheck) == u"0":
-        skip_ssl_validation = True
-
-    if unicode(options.use_simple_http_client) == u"1":
-        use_simple_http_client = True
-
-    # If we don't have any arguments, run the server.
-    if not args:
-        app = init_forwarder(skip_ssl_validation, use_simple_http_client=use_simple_http_client)
-        try:
-            app.run()
-        finally:
-            ForwarderStatus.remove_latest_status()
-
-    else:
-        usage = "%s [help|info]. Run with no commands to start the server" % (sys.argv[0])
-        command = args[0]
-        if command == 'info':
-            logging.getLogger().setLevel(logging.ERROR)
-            return ForwarderStatus.print_latest_status()
-        elif command == 'help':
-            print usage
-        else:
-            print "Unknown command: %s" % command
-            print usage
-            return -1
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/monagent/forwarder/api/init.py
+++ b/monagent/forwarder/api/init.py
@ -1,108 +1 @@
-import json
-import logging

-from threading import Timer
-from monagent.common.keystone import Keystone
-from monagent.common.util import get_hostname
-from monclient import client
-import monclient.exc as exc
-
-log = logging.getLogger(__name__)
-
-
-class MonAPI(object):
-    """Sends measurements to MonAPI
-        Any errors should raise an exception so the transaction calling
-        this is not committed
-    """
-    def __init__(self, config):
-        """
-        Initialize Mon api client connection.
-        """
-        self.config = config
-        self.url = config['url']
-        self.api_version = '2_0'
-        self.default_dimensions = config['dimensions']
-        self.token_expiration = 1438
-        # Verify the hostname is set as a dimension
-        if not 'hostname' in self.default_dimensions:
-            self.default_dimensions['hostname'] = get_hostname()
-
-        log.debug("Getting token from Keystone")
-        self.keystone_url = config['keystone_url']
-        self.username = config['username']
-        self.password = config['password']
-        self.project_name = config['project_name']
-        
-        self.keystone = Keystone(self.keystone_url,
-                                 self.username,
-                                 self.password,
-                                 self.project_name)
-        self.mon_client = None
-
-    def _post(self, measurements):
-        """Does the actual http post
-            measurements is a list of Measurement
-        """
-        data = [m.__dict__ for m in measurements]
-        kwargs = {
-            'jsonbody': data
-        }
-        try:
-            if not self.mon_client:
-                # construct the mon client
-                self.mon_client = self.get_client()
-
-            done = False
-            while not done:
-                response = self.mon_client.metrics.create(**kwargs)
-                if 200 <= response.status_code <= 299:
-                    # Good status from web service
-                    log.debug("Message sent successfully: {0}"
-                              .format(str(data)))
-                elif 400 <= response.status_code <= 499:
-                    # Good status from web service but some type of issue
-                    # with the data
-                    if response.status_code == 401:
-                        # Get a new token/client and retry
-                        self.mon_client = self.get_client()
-                        continue
-                    else:
-                        error_msg = "Successful web service call but there" + \
-                                    " were issues (Status: {0}, Status Message: " + \
-                                    "{1}, Message Content: {1})"
-                        log.error(error_msg.format(response.status_code,
-                                                   response.reason, response.text))
-                        response.raise_for_status()
-                else:  # Not a good status
-                    response.raise_for_status()
-                done = True
-        except exc.HTTPException as he:
-            log.error("Error sending message to mon-api: {0}"
-                      .format(str(he.message)))
-
-    def post_metrics(self, measurements):
-        """post_metrics
-            given [Measurement, ...], format the request and post to
-            the monitoring api
-        """
-        # Add default dimensions
-        for measurement in measurements:
-            for dimension in self.default_dimensions.keys():
-                if not measurement.dimensions.has_key(dimension):
-                    measurement.dimensions.update({dimension: self.default_dimensions[dimension]})
-
-        self._post(measurements)
-
-    def get_client(self):
-        """get_client
-            get a new mon-client object
-        """
-        token = self.keystone.refresh_token()
-        # Re-create the client.  This is temporary until
-        # the client is updated to be able to reset the
-        # token.
-        kwargs = {
-            'token': token
-        }
-        return client.Client(self.api_version, self.url, **kwargs)
--- a/monagent/forwarder/api/mon.py
+++ b/monagent/forwarder/api/mon.py
@ -0,0 +1,105 @@
+import logging
+
+from monclient import exc as exc, client
+from monagent.common.keystone import Keystone
+from monagent.common.util import get_hostname
+
+log = logging.getLogger(__name__)
+
+
+class MonAPI(object):
+    """Sends measurements to MonAPI
+        Any errors should raise an exception so the transaction calling
+        this is not committed
+    """
+    def __init__(self, config):
+        """
+        Initialize Mon api client connection.
+        """
+        self.config = config
+        self.url = config['url']
+        self.api_version = '2_0'
+        self.default_dimensions = config['dimensions']
+        self.token_expiration = 1438
+        # Verify the hostname is set as a dimension
+        if not 'hostname' in self.default_dimensions:
+            self.default_dimensions['hostname'] = get_hostname()
+
+        log.debug("Getting token from Keystone")
+        self.keystone_url = config['keystone_url']
+        self.username = config['username']
+        self.password = config['password']
+        self.project_name = config['project_name']
+
+        self.keystone = Keystone(self.keystone_url,
+                                 self.username,
+                                 self.password,
+                                 self.project_name)
+        self.mon_client = None
+
+    def _post(self, measurements):
+        """Does the actual http post
+            measurements is a list of Measurement
+        """
+        data = [m.__dict__ for m in measurements]
+        kwargs = {
+            'jsonbody': data
+        }
+        try:
+            if not self.mon_client:
+                # construct the mon client
+                self.mon_client = self.get_client()
+
+            done = False
+            while not done:
+                response = self.mon_client.metrics.create(**kwargs)
+                if 200 <= response.status_code <= 299:
+                    # Good status from web service
+                    log.debug("Message sent successfully: {0}"
+                              .format(str(data)))
+                elif 400 <= response.status_code <= 499:
+                    # Good status from web service but some type of issue
+                    # with the data
+                    if response.status_code == 401:
+                        # Get a new token/client and retry
+                        self.mon_client = self.get_client()
+                        continue
+                    else:
+                        error_msg = "Successful web service call but there" + \
+                                    " were issues (Status: {0}, Status Message: " + \
+                                    "{1}, Message Content: {1})"
+                        log.error(error_msg.format(response.status_code,
+                                                   response.reason, response.text))
+                        response.raise_for_status()
+                else:  # Not a good status
+                    response.raise_for_status()
+                done = True
+        except exc.HTTPException as he:
+            log.error("Error sending message to mon-api: {0}"
+                      .format(str(he.message)))
+
+    def post_metrics(self, measurements):
+        """post_metrics
+            given [Measurement, ...], format the request and post to
+            the monitoring api
+        """
+        # Add default dimensions
+        for measurement in measurements:
+            for dimension in self.default_dimensions.keys():
+                if not measurement.dimensions.has_key(dimension):
+                    measurement.dimensions.update({dimension: self.default_dimensions[dimension]})
+
+        self._post(measurements)
+
+    def get_client(self):
+        """get_client
+            get a new mon-client object
+        """
+        token = self.keystone.refresh_token()
+        # Re-create the client.  This is temporary until
+        # the client is updated to be able to reset the
+        # token.
+        kwargs = {
+            'token': token
+        }
+        return client.Client(self.api_version, self.url, **kwargs)
--- a/monagent/forwarder/daemon.py
+++ b/monagent/forwarder/daemon.py
@ -0,0 +1,328 @@
+#!/usr/bin/env python
+"""
+    Datadog
+    www.datadoghq.com
+    ----
+    Make sense of your IT Data
+
+    Licensed under Simplified BSD License (see LICENSE)
+    (C) Boxed Ice 2010 all rights reserved
+    (C) Datadog, Inc. 2010-2013 all rights reserved
+"""
+
+# set up logging before importing any other components
+from monagent.common.config import initialize_logging
+from monagent.forwarder.api.mon import MonAPI
+
+initialize_logging('forwarder')
+from monagent.common.config import get_logging_config
+
+import os
+os.umask(022)
+
+# Standard imports
+import logging
+import sys
+from datetime import timedelta
+import signal
+from socket import gaierror, error as socket_error
+
+# Tornado
+import tornado.httpclient
+import tornado.httpserver
+import tornado.ioloop
+import tornado.web
+from tornado.escape import json_decode
+from tornado.options import define, parse_command_line, options
+
+# agent import
+from monagent.common.check_status import ForwarderStatus
+from monagent.common.config import get_config
+from monagent.common.metrics import Measurement
+from monagent.common.util import Watchdog, get_tornado_ioloop
+from transaction import Transaction, TransactionManager
+
+log = logging.getLogger('forwarder')
+log.setLevel(get_logging_config()['log_level'] or logging.INFO)
+
+TRANSACTION_FLUSH_INTERVAL = 5000  # Every 5 seconds
+WATCHDOG_INTERVAL_MULTIPLIER = 10  # 10x flush interval
+
+# Maximum delay before replaying a transaction
+MAX_WAIT_FOR_REPLAY = timedelta(seconds=90)
+
+# Maximum queue size in bytes (when this is reached, old messages are dropped)
+MAX_QUEUE_SIZE = 30 * 1024 * 1024  # 30MB
+
+THROTTLING_DELAY = timedelta(microseconds=1000000/2)  # 2 msg/second
+
+
+class MetricTransaction(Transaction):
+
+    _application = None
+    _trManager = None
+    _endpoints = []
+
+    @classmethod
+    def set_application(cls, app):
+        cls._application = app
+
+    @classmethod
+    def set_tr_manager(cls, manager):
+        cls._trManager = manager
+
+    @classmethod
+    def get_tr_manager(cls):
+        return cls._trManager
+
+    @classmethod
+    def set_endpoints(cls, endpoint):
+        # todo we only have one endpoint option, generalize it better
+        # the immediate use case for two endpoints could be our own monitoring boxes, they could send to
+        # the main api and mini-mon api
+        cls._endpoints.append(endpoint)
+
+    def __init__(self, data, headers):
+        self._data = data
+        self._headers = headers
+
+        # Call after data has been set (size is computed in Transaction's init)
+        Transaction.__init__(self)
+
+        # Insert the transaction in the Manager
+        self._trManager.append(self)
+        log.debug("Created transaction %d" % self.get_id())
+        self._trManager.flush()
+
+    def __sizeof__(self):
+        return sys.getsizeof(self._data)
+
+    def flush(self):
+        try:
+            for endpoint in self._endpoints:
+                endpoint.post_metrics(self._data)
+        except Exception:
+            log.exception('Error flushing metrics to remote endpoints')
+            self._trManager.tr_error(self)
+        else:
+            self._trManager.tr_success(self)
+        self._trManager.flush_next()
+
+
+class StatusHandler(tornado.web.RequestHandler):
+
+    def get(self):
+        threshold = int(self.get_argument('threshold', -1))
+
+        m = MetricTransaction.get_tr_manager()
+
+        self.write("<table><tr><td>Id</td><td>Size</td><td>Error count</td><td>Next flush</td></tr>")
+        transactions = m.get_transactions()
+        for tr in transactions:
+            self.write("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" %
+                       (tr.get_id(), tr.get_size(), tr.get_error_count(), tr.get_next_flush()))
+        self.write("</table>")
+
+        if threshold >= 0:
+            if len(transactions) > threshold:
+                self.set_status(503)
+
+
+class AgentInputHandler(tornado.web.RequestHandler):
+
+    def post(self):
+        """Read the message and forward it to the intake
+            The message is expected to follow the format:
+
+        """
+        # read the message it should be a list of monagent.common.metrics.Measurements expressed as a dict
+        msg = tornado.escape.json_decode(self.request.body)
+        try:
+            log.debug(msg)
+            measurements = [Measurement(**m) for m in msg]
+        except Exception:
+            log.exception('Error parsing body of Agent Input')
+            raise tornado.web.HTTPError(500)
+
+        headers = self.request.headers
+
+        if len(measurements) > 0:
+            # Setup a transaction for this message
+            tr = MetricTransaction(measurements, headers)
+        else:
+            raise tornado.web.HTTPError(500)
+
+        self.write("Transaction: %s" % tr.get_id())
+
+
+class Forwarder(tornado.web.Application):
+
+    def __init__(self, port, agent_config, watchdog=True, skip_ssl_validation=False, use_simple_http_client=False):
+        self._port = int(port)
+        self._agentConfig = agent_config
+        self._metrics = {}
+        MetricTransaction.set_application(self)
+        MetricTransaction.set_endpoints(MonAPI(agent_config['Api']))
+        self._tr_manager = TransactionManager(MAX_WAIT_FOR_REPLAY, MAX_QUEUE_SIZE, THROTTLING_DELAY)
+        MetricTransaction.set_tr_manager(self._tr_manager)
+
+        self._watchdog = None
+        self.skip_ssl_validation = skip_ssl_validation or agent_config.get('skip_ssl_validation', False)
+        self.use_simple_http_client = use_simple_http_client
+        if self.skip_ssl_validation:
+            log.info("Skipping SSL hostname validation, useful when using a transparent proxy")
+
+        if watchdog:
+            watchdog_timeout = TRANSACTION_FLUSH_INTERVAL * WATCHDOG_INTERVAL_MULTIPLIER
+            self._watchdog = Watchdog(watchdog_timeout, max_mem_mb=agent_config.get('limit_memory_consumption', None))
+
+    def _post_metrics(self):
+
+        if len(self._metrics) > 0:
+            MetricTransaction(self._metrics, headers={'Content-Type': 'application/json'})
+            self._metrics = {}
+
+    # todo why is the tornado logging method overridden? Perhaps ditch this.
+    def log_request(self, handler):
+        """ Override the tornado logging method.
+        If everything goes well, log level is DEBUG.
+        Otherwise it's WARNING or ERROR depending on the response code. """
+        if handler.get_status() < 400:
+            log_method = log.debug
+        elif handler.get_status() < 500:
+            log_method = log.warning
+        else:
+            log_method = log.error
+        request_time = 1000.0 * handler.request.request_time()
+        log_method("%d %s %.2fms", handler.get_status(),
+                   handler._request_summary(), request_time)
+
+    def run(self):
+        handlers = [
+            (r"/intake/?", AgentInputHandler),
+            (r"/api/v1/series/?", AgentInputHandler),
+            (r"/status/?", StatusHandler),
+        ]
+
+        settings = dict(
+            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
+            xsrf_cookies=False,
+            debug=False,
+            log_function=self.log_request
+        )
+
+        non_local_traffic = self._agentConfig.get("non_local_traffic", False)
+
+        tornado.web.Application.__init__(self, handlers, **settings)
+        http_server = tornado.httpserver.HTTPServer(self)
+
+        try:
+            # non_local_traffic must be == True to match, not just some non-false value
+            if non_local_traffic is True:
+                http_server.listen(self._port)
+            else:
+                # localhost in lieu of 127.0.0.1 to support IPv6
+                try:
+                    http_server.listen(self._port, address="localhost")
+                except gaierror:
+                    log.warning("localhost seems undefined in your host file, using 127.0.0.1 instead")
+                    http_server.listen(self._port, address="127.0.0.1")
+                except socket_error, e:
+                    if "Errno 99" in str(e):
+                        log.warning("IPv6 doesn't seem to be fully supported. Falling back to IPv4")
+                        http_server.listen(self._port, address="127.0.0.1")
+                    else:
+                        raise
+        except socket_error, e:
+            log.exception("Socket error %s. Is another application listening on the same port ? Exiting", e)
+            sys.exit(1)
+        except Exception:
+            log.exception("Uncaught exception. Forwarder is exiting.")
+            sys.exit(1)
+
+        log.info("Listening on port %d" % self._port)
+
+        # Register callbacks
+        self.mloop = get_tornado_ioloop()
+
+        logging.getLogger().setLevel(get_logging_config()['log_level'] or logging.INFO)
+
+        def flush_trs():
+            if self._watchdog:
+                self._watchdog.reset()
+            self._post_metrics()
+            self._tr_manager.flush()
+
+        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs, TRANSACTION_FLUSH_INTERVAL, io_loop=self.mloop)
+
+        # Start everything
+        if self._watchdog:
+            self._watchdog.reset()
+        tr_sched.start()
+
+        self.mloop.start()
+        log.info("Stopped")
+
+    def stop(self):
+        self.mloop.stop()
+
+
+def init_forwarder(skip_ssl_validation=False, use_simple_http_client=False):
+    agent_config = get_config(parse_args=False)
+
+    port = agent_config.get('listen_port', 17123)
+    if port is None:
+        port = 17123
+    else:
+        port = int(port)
+
+    app = Forwarder(port, agent_config, skip_ssl_validation=skip_ssl_validation,
+                    use_simple_http_client=use_simple_http_client)
+
+    def sigterm_handler(signum, frame):
+        log.info("caught sigterm. stopping")
+        app.stop()
+
+    signal.signal(signal.SIGTERM, sigterm_handler)
+    signal.signal(signal.SIGINT, sigterm_handler)
+
+    return app
+
+
+def main():
+    define("sslcheck", default=1, help="Verify SSL hostname, on by default")
+    define("use_simple_http_client", default=0, help="Use Tornado SimpleHTTPClient instead of CurlAsyncHTTPClient")
+    args = parse_command_line()
+    skip_ssl_validation = False
+    use_simple_http_client = False
+
+    if unicode(options.sslcheck) == u"0":
+        skip_ssl_validation = True
+
+    if unicode(options.use_simple_http_client) == u"1":
+        use_simple_http_client = True
+
+    # If we don't have any arguments, run the server.
+    if not args:
+        app = init_forwarder(skip_ssl_validation, use_simple_http_client=use_simple_http_client)
+        try:
+            app.run()
+        finally:
+            ForwarderStatus.remove_latest_status()
+
+    else:
+        usage = "%s [help|info]. Run with no commands to start the server" % (sys.argv[0])
+        command = args[0]
+        if command == 'info':
+            logging.getLogger().setLevel(logging.ERROR)
+            return ForwarderStatus.print_latest_status()
+        elif command == 'help':
+            print usage
+        else:
+            print "Unknown command: %s" % command
+            print usage
+            return -1
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/monagent/monstatsd/init.py
+++ b/monagent/monstatsd/init.py
@ -1,154 +0,0 @@
-#!/usr/bin/env python
-"""
-A Python Statsd implementation with dimensions added
-"""
-
-# set up logging before importing any other components
-from monagent.common.config import initialize_logging
-from monagent.monstatsd.reporter import Reporter
-from monagent.monstatsd.udp import Server
-
-initialize_logging('monstatsd')
-
-import os
-os.umask(022)
-
-# stdlib
-import logging
-import optparse
-import signal
-import sys
-
-# project
-from monagent.common.aggregator import MetricsBucketAggregator
-from monagent.common.check_status import MonstatsdStatus
-from monagent.common.config import get_config
-from monagent.common.daemon import Daemon, AgentSupervisor
-from monagent.common.util import PidFile, get_hostname
-
-log = logging.getLogger('monstatsd')
-
-
-class Monstatsd(Daemon):
-    """ This class is the monstatsd daemon. """
-
-    def __init__(self, pid_file, server, reporter, autorestart):
-        Daemon.__init__(self, pid_file, autorestart=autorestart)
-        self.server = server
-        self.reporter = reporter
-
-    def _handle_sigterm(self, signum, frame):
-        log.debug("Caught sigterm. Stopping run loop.")
-        self.server.stop()
-
-    def run(self):
-        # Gracefully exit on sigterm.
-        signal.signal(signal.SIGTERM, self._handle_sigterm)
-
-        # Handle Keyboard Interrupt
-        signal.signal(signal.SIGINT, self._handle_sigterm)
-
-        # Start the reporting thread before accepting data
-        self.reporter.start()
-
-        try:
-            try:
-                self.server.start()
-            except Exception, e:
-                log.exception('Error starting server')
-                raise e
-        finally:
-            # The server will block until it's done. Once we're here, shutdown
-            # the reporting thread.
-            self.reporter.stop()
-            self.reporter.join()
-            log.info("Monstatsd is stopped")
-            # Restart if asked to restart
-            if self.autorestart:
-                sys.exit(AgentSupervisor.RESTART_EXIT_STATUS)
-
-    def info(self):
-        logging.getLogger().setLevel(logging.ERROR)
-        return MonstatsdStatus.print_latest_status()
-
-
-def init_monstatsd(config_path=None, use_watchdog=False):
-    """Configure the server and the reporting thread.
-    """
-    c = get_config(parse_args=False, cfg_path=config_path)
-    log.debug("Configuration monstatsd")
-
-    port = c['monstatsd_port']
-    interval = int(c['monstatsd_interval'])
-    aggregator_interval = int(c['monstatsd_agregator_bucket_size'])
-    non_local_traffic = c['non_local_traffic']
-    forward_to_host = c.get('statsd_forward_host')
-    forward_to_port = c.get('statsd_forward_port')
-    event_chunk_size = c.get('event_chunk_size')
-
-    target = c['forwarder_url']
-
-    hostname = get_hostname(c)
-
-    # Create the aggregator (which is the point of communication between the
-    # server and reporting threads.
-    assert 0 < interval
-
-    aggregator = MetricsBucketAggregator(hostname, aggregator_interval,
-                                         recent_point_threshold=c.get('recent_point_threshold', None))
-
-    # Start the reporting thread.
-    reporter = Reporter(interval, aggregator, target, use_watchdog, event_chunk_size)
-
-    # Start the server on an IPv4 stack
-    # Default to loopback
-    server_host = 'localhost'
-    # If specified, bind to all addressses
-    if non_local_traffic:
-        server_host = ''
-
-    server = Server(aggregator, server_host, port, forward_to_host=forward_to_host, forward_to_port=forward_to_port)
-
-    return reporter, server, c
-
-
-def main(config_path=None):
-    """ The main entry point for the unix version of monstatsd. """
-    parser = optparse.OptionParser("%prog [start|stop|restart|status]")
-    opts, args = parser.parse_args()
-
-    reporter, server, cnf = init_monstatsd(config_path, use_watchdog=True)
-    pid_file = PidFile('monstatsd')
-    daemon = Monstatsd(pid_file.get_path(), server, reporter,
-                       cnf.get('autorestart', False))
-
-    # If no args were passed in, run the server in the foreground.
-    # todo does this need to be a daemon even when it basically always runs in the foreground, if not
-    # restructure and get rid of the silly init_function
-    if not args:
-        daemon.run()
-        return 0
-
-    # Otherwise, we're process the deamon command.
-    else:
-        command = args[0]
-
-        if command == 'start':
-            daemon.start()
-        elif command == 'stop':
-            daemon.stop()
-        elif command == 'restart':
-            daemon.restart()
-        elif command == 'status':
-            daemon.status()
-        elif command == 'info':
-            return daemon.info()
-        else:
-            sys.stderr.write("Unknown command: %s\n\n" % command)
-            parser.print_help()
-            return 1
-        return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/monagent/monstatsd/daemon.py
+++ b/monagent/monstatsd/daemon.py
@ -0,0 +1,154 @@
+#!/usr/bin/env python
+"""
+A Python Statsd implementation with dimensions added
+"""
+
+# set up logging before importing any other components
+from monagent.common.config import initialize_logging
+from monagent.monstatsd.reporter import Reporter
+from monagent.monstatsd.udp import Server
+
+initialize_logging('monstatsd')
+
+import os
+os.umask(022)
+
+# stdlib
+import logging
+import optparse
+import signal
+import sys
+
+# project
+from monagent.common.aggregator import MetricsBucketAggregator
+from monagent.common.check_status import MonstatsdStatus
+from monagent.common.config import get_config
+from monagent.common.daemon import Daemon, AgentSupervisor
+from monagent.common.util import PidFile, get_hostname
+
+log = logging.getLogger('monstatsd')
+
+
+class Monstatsd(Daemon):
+    """ This class is the monstatsd daemon. """
+
+    def __init__(self, pid_file, server, reporter, autorestart):
+        Daemon.__init__(self, pid_file, autorestart=autorestart)
+        self.server = server
+        self.reporter = reporter
+
+    def _handle_sigterm(self, signum, frame):
+        log.debug("Caught sigterm. Stopping run loop.")
+        self.server.stop()
+
+    def run(self):
+        # Gracefully exit on sigterm.
+        signal.signal(signal.SIGTERM, self._handle_sigterm)
+
+        # Handle Keyboard Interrupt
+        signal.signal(signal.SIGINT, self._handle_sigterm)
+
+        # Start the reporting thread before accepting data
+        self.reporter.start()
+
+        try:
+            try:
+                self.server.start()
+            except Exception, e:
+                log.exception('Error starting server')
+                raise e
+        finally:
+            # The server will block until it's done. Once we're here, shutdown
+            # the reporting thread.
+            self.reporter.stop()
+            self.reporter.join()
+            log.info("Monstatsd is stopped")
+            # Restart if asked to restart
+            if self.autorestart:
+                sys.exit(AgentSupervisor.RESTART_EXIT_STATUS)
+
+    def info(self):
+        logging.getLogger().setLevel(logging.ERROR)
+        return MonstatsdStatus.print_latest_status()
+
+
+def init_monstatsd(config_path=None, use_watchdog=False):
+    """Configure the server and the reporting thread.
+    """
+    c = get_config(parse_args=False, cfg_path=config_path)
+    log.debug("Configuration monstatsd")
+
+    port = c['monstatsd_port']
+    interval = int(c['monstatsd_interval'])
+    aggregator_interval = int(c['monstatsd_agregator_bucket_size'])
+    non_local_traffic = c['non_local_traffic']
+    forward_to_host = c.get('statsd_forward_host')
+    forward_to_port = c.get('statsd_forward_port')
+    event_chunk_size = c.get('event_chunk_size')
+
+    target = c['forwarder_url']
+
+    hostname = get_hostname(c)
+
+    # Create the aggregator (which is the point of communication between the
+    # server and reporting threads.
+    assert 0 < interval
+
+    aggregator = MetricsBucketAggregator(hostname, aggregator_interval,
+                                         recent_point_threshold=c.get('recent_point_threshold', None))
+
+    # Start the reporting thread.
+    reporter = Reporter(interval, aggregator, target, use_watchdog, event_chunk_size)
+
+    # Start the server on an IPv4 stack
+    # Default to loopback
+    server_host = 'localhost'
+    # If specified, bind to all addressses
+    if non_local_traffic:
+        server_host = ''
+
+    server = Server(aggregator, server_host, port, forward_to_host=forward_to_host, forward_to_port=forward_to_port)
+
+    return reporter, server, c
+
+
+def main(config_path=None):
+    """ The main entry point for the unix version of monstatsd. """
+    parser = optparse.OptionParser("%prog [start|stop|restart|status]")
+    opts, args = parser.parse_args()
+
+    reporter, server, cnf = init_monstatsd(config_path, use_watchdog=True)
+    pid_file = PidFile('monstatsd')
+    daemon = Monstatsd(pid_file.get_path(), server, reporter,
+                       cnf.get('autorestart', False))
+
+    # If no args were passed in, run the server in the foreground.
+    # todo does this need to be a daemon even when it basically always runs in the foreground, if not
+    # restructure and get rid of the silly init_function
+    if not args:
+        daemon.run()
+        return 0
+
+    # Otherwise, we're process the deamon command.
+    else:
+        command = args[0]
+
+        if command == 'start':
+            daemon.start()
+        elif command == 'stop':
+            daemon.stop()
+        elif command == 'restart':
+            daemon.restart()
+        elif command == 'status':
+            daemon.status()
+        elif command == 'info':
+            return daemon.info()
+        else:
+            sys.stderr.write("Unknown command: %s\n\n" % command)
+            parser.print_help()
+            return 1
+        return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/monsetup/agent_config.py
+++ b/monsetup/agent_config.py
@ -17,7 +17,7 @@ class Plugins(collections.defaultdict):
    # todo Possibly enforce the key being a string without .yaml in it.

    def diff(self, other_plugins):
-       raise NotImplementedError
+        raise NotImplementedError

    def merge(self, other):
        """Do a deep merge with precedence going to other (as is the case with update)
--- a/monsetup/detection/init.py
+++ b/monsetup/detection/init.py
@ -1,99 +1,3 @@
-"""Classes and utilities for detection of running resources to be monitored.
-    Detection classes should be platform independent
-"""
-import psutil
+from plugin import Plugin
+from utils import find_process_cmdline, find_process_name, watch_process, service_api_check

-from monsetup import agent_config
-
-
-class Plugin(object):
-    """Abstract class implemented by the mon-agent plugin detection classes
-    """
-    # todo these should include dependency detection
-
-    def __init__(self, template_dir, overwrite=True):
-        self.available = False
-        self.template_dir = template_dir
-        self.dependencies = ()
-        self.overwrite = overwrite
-        self._detect()
-
-    def _detect(self):
-        """Run detection, set self.available True if the service is detected."""
-        raise NotImplementedError
-
-    def build_config(self):
-        """Build the config as a Plugins object and return.
-        """
-        raise NotImplementedError
-
-    def dependencies_installed(self):
-        """return True if dependencies are installed
-        """
-        raise NotImplementedError
-
-    @property
-    def name(self):
-        """Return _name if set otherwise the class name"""
-        if '_name' in self.__dict__:
-            return self._name
-        else:
-            return self.__class__.__name__
-
-
-def find_process_cmdline(search_string):
-    """Simple function to search running process for one with cmdline containing
-    """
-    for process in psutil.process_iter():
-        for arg in process.cmdline():
-            if arg.find(search_string) != -1:
-                return process
-
-    return None
-
-
-def find_process_name(pname):
-    """Simple function to search running process for one with pname.
-    """
-    for process in psutil.process_iter():
-        if pname == process.name():
-            return process
-
-    return None
-
-
-def watch_process(search_strings, service = None):
-    """Takes a list of process search strings and returns a Plugins object with the config set.
-        This was built as a helper as many plugins setup process watching
-    """
-    config = agent_config.Plugins()
-    parameters = {'name': search_strings[0],
-                  'search_string': search_strings}
-
-    # If service parameter is set in the plugin config, add the service dimension which
-    # will override the service in the agent config
-    if service:
-        parameters['dimensions'] = {'service': service}
-
-    config['process'] = {'init_config': None,
-                         'instances': [parameters]}
-    return config
-
-def service_api_check(name, url, pattern, service = None):
-    """Setup a service api to be watched by the http_check plugin."""
-    config = agent_config.Plugins()
-    parameters = {'name': name,
-                  'url': url,
-                  'match_pattern': pattern,
-                  'timeout': 10,
-                  'use_keystone': True}
-
-    # If service parameter is set in the plugin config, add the service dimension which
-    # will override the service in the agent config
-    if service:
-        parameters['dimensions'] = {'service': service}
-
-    config['http_check'] = {'init_config': None,
-                            'instances': [parameters]}
-
-    return config
--- a/monsetup/detection/cinder.py
+++ b/monsetup/detection/cinder.py
@ -4,6 +4,7 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class Cinder(Plugin):
    """Detect cinder daemons and setup configuration to monitor them."""

--- a/monsetup/detection/cinder_api.py
+++ b/monsetup/detection/cinder_api.py
@ -4,6 +4,7 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class CinderAPI(Plugin):
    """Detect the Cinder-API daemon and setup configuration to monitor it."""

@ -21,11 +22,12 @@ class CinderAPI(Plugin):
        config = agent_config.Plugins()
        # First watch the Nova-API process
        log.info("\tMonitoring the cinder API process.")
-        config.merge(watch_process([self.process_name],  self.service_name))
+        config.merge(watch_process([self.process_name], self.service_name))

        # Next setup an active http_status check on the API
        log.info("\tConfiguring an http_check for the cinder API.")
-        config.merge(service_api_check(self.process_name, 'http://localhost:8776/v2.0', '.*version=1.*',  self.service_name))
+        config.merge(
+            service_api_check(self.process_name, 'http://localhost:8776/v2.0', '.*version=1.*', self.service_name))

        return config

--- a/monsetup/detection/kafka.py
+++ b/monsetup/detection/kafka.py
@ -31,6 +31,7 @@ class Kafka(Plugin):

            import kazoo
            from kazoo.client import KazooClient
+
            logging.getLogger('kazoo').setLevel(logging.WARN)  # kazoo fills up the console without this

            zk = KazooClient(hosts='127.0.0.1:2181', read_only=True)
@ -48,7 +49,6 @@ class Kafka(Plugin):
                except kazoo.exceptions.NoNodeError:
                    continue

-
            log.info("\tInstalling kafka_consumer plugin.")
            config['kafka_consumer'] = {'init_config': None,
                                        'instances': [{'kafka_connect_str': 'localhost:9092',
--- a/monsetup/detection/nova.py
+++ b/monsetup/detection/nova.py
@ -4,6 +4,7 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class Nova(Plugin):
    """Detect Nova daemons and setup configuration to monitor them."""

--- a/monsetup/detection/nova_api.py
+++ b/monsetup/detection/nova_api.py
@ -4,6 +4,7 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class NovaAPI(Plugin):
    """Detect the Nova-API daemon and setup configuration to monitor it."""

@ -25,7 +26,8 @@ class NovaAPI(Plugin):

        # Next setup an active http_status check on the API
        log.info("\tConfiguring an http_check for the nova API.")
-        config.merge(service_api_check(self.process_name, 'http://localhost:8774/v2.0', '.*version=2.*', self.service_name))
+        config.merge(
+            service_api_check(self.process_name, 'http://localhost:8774/v2.0', '.*version=2.*', self.service_name))

        return config

--- a/monsetup/detection/plugin.py
+++ b/monsetup/detection/plugin.py
@ -0,0 +1,40 @@
+"""Classes for detection of running resources to be monitored.
+    Detection classes should be platform independent
+"""
+
+
+class Plugin(object):
+    """Abstract class implemented by the mon-agent plugin detection classes
+    """
+    # todo these should include dependency detection
+
+    def __init__(self, template_dir, overwrite=True):
+        self.available = False
+        self.template_dir = template_dir
+        self.dependencies = ()
+        self.overwrite = overwrite
+        self._detect()
+
+    def _detect(self):
+        """Run detection, set self.available True if the service is detected."""
+        raise NotImplementedError
+
+    def build_config(self):
+        """Build the config as a Plugins object and return.
+        """
+        raise NotImplementedError
+
+    def dependencies_installed(self):
+        """return True if dependencies are installed
+        """
+        raise NotImplementedError
+
+    @property
+    def name(self):
+        """Return _name if set otherwise the class name"""
+        if '_name' in self.__dict__:
+            return self._name
+        else:
+            return self.__class__.__name__
+
+
--- a/monsetup/detection/swift.py
+++ b/monsetup/detection/swift.py
@ -4,18 +4,19 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class Swift(Plugin):
    """Detect Swift daemons and setup configuration to monitor them."""

    def _detect(self):
        """Run detection"""
        self.swift_processes = ['swift-container-updater', 'swift-account-auditor',
-                               'swift-object-replicator', 'swift-container-replicator',
-                               'swift-object-auditor', 'swift-container-auditor',
-                               'swift-account-reaper', 'swift-container-sync',
-                               'swift-account-replicator', 'swift-object-updater',
-                               'swift-object-server', 'swift-account-server',
-                               'swift-container-server']
+                                'swift-object-replicator', 'swift-container-replicator',
+                                'swift-object-auditor', 'swift-container-auditor',
+                                'swift-account-reaper', 'swift-container-sync',
+                                'swift-account-replicator', 'swift-object-updater',
+                                'swift-object-server', 'swift-account-server',
+                                'swift-container-server']
        self.found_processes = []

        for process in self.swift_processes:
--- a/monsetup/detection/swift_api.py
+++ b/monsetup/detection/swift_api.py
@ -4,6 +4,7 @@ from monsetup import agent_config

 log = logging.getLogger(__name__)

+
 class SwiftAPI(Plugin):
    """Detect the Swift-API daemon and setup configuration to monitor it."""

@ -25,7 +26,8 @@ class SwiftAPI(Plugin):

        # Next setup an active http_status check on the API
        log.info("\tConfiguring an http_check for the swift API.")
-        config.merge(service_api_check(self.process_name, 'http://localhost:8080/healthcheck', '.*OK.*', self.service_name))
+        config.merge(
+            service_api_check(self.process_name, 'http://localhost:8080/healthcheck', '.*OK.*', self.service_name))

        return config

--- a/monsetup/detection/utils.py
+++ b/monsetup/detection/utils.py
@ -0,0 +1,64 @@
+""" Util functions to assist in detection.
+"""
+import psutil
+
+from monsetup import agent_config
+
+
+def find_process_cmdline(search_string):
+    """Simple function to search running process for one with cmdline containing
+    """
+    for process in psutil.process_iter():
+        for arg in process.cmdline():
+            if arg.find(search_string) != -1:
+                return process
+
+    return None
+
+
+def find_process_name(pname):
+    """Simple function to search running process for one with pname.
+    """
+    for process in psutil.process_iter():
+        if pname == process.name():
+            return process
+
+    return None
+
+
+def watch_process(search_strings, service=None):
+    """Takes a list of process search strings and returns a Plugins object with the config set.
+        This was built as a helper as many plugins setup process watching
+    """
+    config = agent_config.Plugins()
+    parameters = {'name': search_strings[0],
+                  'search_string': search_strings}
+
+    # If service parameter is set in the plugin config, add the service dimension which
+    # will override the service in the agent config
+    if service:
+        parameters['dimensions'] = {'service': service}
+
+    config['process'] = {'init_config': None,
+                         'instances': [parameters]}
+    return config
+
+
+def service_api_check(name, url, pattern, service=None):
+    """Setup a service api to be watched by the http_check plugin."""
+    config = agent_config.Plugins()
+    parameters = {'name': name,
+                  'url': url,
+                  'match_pattern': pattern,
+                  'timeout': 10,
+                  'use_keystone': True}
+
+    # If service parameter is set in the plugin config, add the service dimension which
+    # will override the service in the agent config
+    if service:
+        parameters['dimensions'] = {'service': service}
+
+    config['http_check'] = {'init_config': None,
+                            'instances': [parameters]}
+
+    return config
--- a/monsetup/service/init.py
+++ b/monsetup/service/init.py
@ -1,44 +1,2 @@
-"""Classes implementing different methods for running mon-agent on startup as well as starting the process immediately
-"""
-import psutil
+from service import Service

-
-class Service(object):
-    """Abstract base class implementing the interface for various service types."""
-    def __init__(self, config_dir, log_dir, name='mon-agent'):
-        self.config_dir = config_dir
-        self.log_dir = log_dir
-        self.name = name
-
-    def enable(self):
-        """Sets mon-agent to start on boot.
-            Generally this requires running as super user
-        """
-        raise NotImplementedError
-
-    def start(self, restart=True):
-        """Starts mon-agent
-            If the agent is running and restart is True, restart
-        """
-        raise NotImplementedError
-
-    def stop(self):
-        """Stops mon-agent
-        """
-        raise NotImplementedError
-
-    def is_enabled(self):
-        """Returns True if mon-agent is setup to start on boot, false otherwise
-        """
-        raise NotImplementedError
-
-    @staticmethod
-    def is_running():
-        """Returns True if mon-agent is running, false otherwise
-        """
-        # Looking for the supervisor process not the individual components
-        for process in psutil.process_iter():
-            if '/etc/mon-agent/supervisor.conf' in process.cmdline():
-                return True
-
-        return False
--- a/monsetup/service/service.py
+++ b/monsetup/service/service.py
@ -0,0 +1,44 @@
+"""Classes implementing different methods for running mon-agent on startup as well as starting the process immediately
+"""
+import psutil
+
+
+class Service(object):
+    """Abstract base class implementing the interface for various service types."""
+    def __init__(self, config_dir, log_dir, name='mon-agent'):
+        self.config_dir = config_dir
+        self.log_dir = log_dir
+        self.name = name
+
+    def enable(self):
+        """Sets mon-agent to start on boot.
+            Generally this requires running as super user
+        """
+        raise NotImplementedError
+
+    def start(self, restart=True):
+        """Starts mon-agent
+            If the agent is running and restart is True, restart
+        """
+        raise NotImplementedError
+
+    def stop(self):
+        """Stops mon-agent
+        """
+        raise NotImplementedError
+
+    def is_enabled(self):
+        """Returns True if mon-agent is setup to start on boot, false otherwise
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def is_running():
+        """Returns True if mon-agent is running, false otherwise
+        """
+        # Looking for the supervisor process not the individual components
+        for process in psutil.process_iter():
+            if '/etc/mon-agent/supervisor.conf' in process.cmdline():
+                return True
+
+        return False
--- a/setup.py
+++ b/setup.py
@ -131,9 +131,9 @@ setup(
    packages=find_packages(exclude=['tests', 'build*', 'packaging*']),
    entry_points={
        'console_scripts': [
-            'mon-forwarder = monagent.forwarder:main',
+            'mon-forwarder = monagent.forwarder.daemon:main',
            'mon-collector = monagent.collector.daemon:main',
-            'monstatsd = monagent.monstatsd:main',
+            'monstatsd = monagent.monstatsd.daemon:main',
            'mon-setup = monsetup.main:main'
        ],
    },