
The repo is Python 3 now, so update hacking to version 3.0 which supports Python 3. Fix problems found. Update local hacking checks for new flake8. Change-Id: I6396403d0a62f5403fc5b7fb04b6ce790c332c84
353 lines
12 KiB
Python
353 lines
12 KiB
Python
#!/usr/bin/env python
|
|
# (C) Copyright 2015-2017 Hewlett Packard Enterprise Development LP
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Core modules
|
|
import glob
|
|
import logging
|
|
import os
|
|
import pstats
|
|
import signal
|
|
import six
|
|
import sys
|
|
import time
|
|
|
|
# Custom modules
|
|
from monasca_agent.collector.checks.collector import Collector
|
|
import monasca_agent.collector.checks.services_checks as status_checks
|
|
from monasca_agent.collector import jmxfetch
|
|
import monasca_agent.common.config as cfg
|
|
import monasca_agent.common.daemon
|
|
import monasca_agent.common.emitter
|
|
import monasca_agent.common.util as util
|
|
|
|
# set up logging before importing any other components
|
|
util.initialize_logging('collector')
|
|
os.umask(0o22)
|
|
|
|
# Check we're not using an old version of Python. We need 2.4 above because
|
|
# some modules (like subprocess) were only introduced in 2.4.
|
|
if int(sys.version_info[1]) <= 3:
|
|
sys.stderr.write("Monasca Agent requires python 2.4 or later.\n")
|
|
sys.exit(2)
|
|
|
|
# Constants
|
|
PID_NAME = "monasca-agent"
|
|
START_COMMANDS = ['start', 'restart', 'foreground']
|
|
|
|
# Globals
|
|
log = logging.getLogger('collector')
|
|
|
|
|
|
# todo the collector has daemon code but is always run in foreground mode
|
|
# from the supervisor, is there a reason for the daemon code then?
|
|
class CollectorDaemon(monasca_agent.common.daemon.Daemon):
|
|
|
|
"""The agent class is a daemon that runs the collector in a background process.
|
|
|
|
"""
|
|
|
|
def __init__(self, pidfile, autorestart, start_event=True):
|
|
monasca_agent.common.daemon.Daemon.__init__(self, pidfile, autorestart=autorestart)
|
|
self.run_forever = True
|
|
self.collector = None
|
|
self.start_event = start_event
|
|
|
|
def _handle_sigterm(self, signum, frame):
|
|
log.debug("Caught sigterm.")
|
|
self._stop(0)
|
|
sys.exit(0)
|
|
|
|
def _handle_sigusr1(self, signum, frame):
|
|
log.debug("Caught sigusrl.")
|
|
self._stop(120)
|
|
sys.exit(monasca_agent.common.daemon.AgentSupervisor.RESTART_EXIT_STATUS)
|
|
|
|
def _stop(self, timeout=0):
|
|
log.info("Stopping collector run loop.")
|
|
self.run_forever = False
|
|
|
|
if jmxfetch.JMXFetch.is_running():
|
|
jmxfetch.JMXFetch.stop()
|
|
|
|
if self.collector:
|
|
self.collector.stop(timeout)
|
|
|
|
log.info('collector stopped')
|
|
|
|
def run(self, config):
|
|
"""Main loop of the collector.
|
|
|
|
"""
|
|
# Gracefully exit on sigterm.
|
|
signal.signal(signal.SIGTERM, self._handle_sigterm)
|
|
|
|
# A SIGUSR1 signals an exit with an autorestart
|
|
if hasattr(signal, 'SIGUSR1'):
|
|
# Windows does not have this signal.
|
|
signal.signal(signal.SIGUSR1, self._handle_sigusr1)
|
|
|
|
# Handle Keyboard Interrupt
|
|
signal.signal(signal.SIGINT, self._handle_sigterm)
|
|
|
|
# Load the checks_d checks
|
|
checksd = util.load_check_directory()
|
|
|
|
self.collector = Collector(
|
|
config, monasca_agent.common.emitter.http_emitter, checksd)
|
|
|
|
check_frequency = int(config['check_freq'])
|
|
|
|
# Initialize the auto-restarter
|
|
self.restart_interval = int(util.get_collector_restart_interval())
|
|
self.agent_start = time.time()
|
|
|
|
exitCode = 0
|
|
exitTimeout = 0
|
|
|
|
# Run the main loop.
|
|
while self.run_forever:
|
|
collection_start = time.time()
|
|
# enable profiler if needed
|
|
profiled = False
|
|
if config.get('profile', False):
|
|
try:
|
|
import cProfile
|
|
profiler = cProfile.Profile()
|
|
profiled = True
|
|
profiler.enable()
|
|
log.debug("Agent profiling is enabled")
|
|
except Exception:
|
|
log.warn("Cannot enable profiler")
|
|
|
|
# Do the work.
|
|
self.collector.run(check_frequency)
|
|
|
|
# disable profiler and printout stats to stdout
|
|
if config.get('profile', False) and profiled:
|
|
try:
|
|
profiler.disable()
|
|
s = six.StringIO()
|
|
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
|
|
ps.print_stats()
|
|
log.debug(s.getvalue())
|
|
except Exception:
|
|
log.warn("Cannot disable profiler")
|
|
|
|
# Check if we should restart.
|
|
if self.autorestart and self._should_restart():
|
|
self.run_forever = False
|
|
exitCode = monasca_agent.common.daemon.AgentSupervisor.RESTART_EXIT_STATUS
|
|
exitTimeout = 120
|
|
log.info('Startng an auto restart')
|
|
|
|
# Only plan for the next loop if we will continue,
|
|
# otherwise just exit quickly.
|
|
if self.run_forever:
|
|
collection_time = time.time() - collection_start
|
|
if collection_time < check_frequency:
|
|
time.sleep(check_frequency - collection_time)
|
|
else:
|
|
log.info(
|
|
"Collection took {0} which is as long or longer then the configured "
|
|
"collection frequency of {1}. Starting collection again without waiting "
|
|
"in result.".format(collection_time, check_frequency))
|
|
self._stop(exitTimeout)
|
|
|
|
# Explicitly kill the process, because it might be running
|
|
# as a daemon.
|
|
log.info("Exiting collector daemon, code %d." % exitCode)
|
|
os._exit(exitCode)
|
|
|
|
def _should_restart(self):
|
|
if time.time() - self.agent_start > self.restart_interval:
|
|
return True
|
|
return False
|
|
|
|
|
|
def main():
|
|
options, args = util.get_parsed_args(prog='monasca-collector')
|
|
config = cfg.Config()
|
|
collector_config = config.get_config(['Main', 'Api', 'Logging'])
|
|
autorestart = collector_config.get('autorestart', False)
|
|
|
|
collector_restart_interval = collector_config.get(
|
|
'collector_restart_interval', 24)
|
|
if collector_restart_interval in range(1, 49):
|
|
pass
|
|
else:
|
|
log.error(
|
|
"Collector_restart_interval = {0} is out of legal range"
|
|
" [1, 48]. Reset collector_restart_interval to 24".format(collector_restart_interval))
|
|
collector_restart_interval = 24
|
|
|
|
COMMANDS = [
|
|
'start',
|
|
'stop',
|
|
'restart',
|
|
'foreground',
|
|
'status',
|
|
'info',
|
|
'check',
|
|
'check_all',
|
|
'configcheck',
|
|
'jmx',
|
|
]
|
|
|
|
if len(args) < 1:
|
|
sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
|
|
return 2
|
|
|
|
command = args[0]
|
|
if command not in COMMANDS:
|
|
sys.stderr.write("Unknown command: %s\n" % command)
|
|
return 3
|
|
|
|
pid_file = util.PidFile('monasca-agent')
|
|
|
|
if options.clean:
|
|
pid_file.clean()
|
|
|
|
agent = CollectorDaemon(pid_file.get_path(), autorestart)
|
|
|
|
if command in START_COMMANDS:
|
|
log.info('Agent version %s' % config.get_version())
|
|
|
|
if 'start' == command:
|
|
log.info('Start daemon')
|
|
agent.start()
|
|
|
|
elif 'stop' == command:
|
|
log.info('Stop daemon')
|
|
agent.stop()
|
|
|
|
elif 'restart' == command:
|
|
log.info('Restart daemon')
|
|
agent.restart()
|
|
|
|
elif 'status' == command:
|
|
agent.status()
|
|
|
|
elif 'info' == command:
|
|
return agent.info(verbose=options.verbose)
|
|
|
|
elif 'foreground' == command:
|
|
logging.info('Running in foreground')
|
|
if autorestart:
|
|
# Set-up the supervisor callbacks and fork it.
|
|
logging.info('Running Agent with auto-restart ON')
|
|
# Run in the standard foreground.
|
|
agent.run(collector_config)
|
|
|
|
elif 'check' == command:
|
|
check_name = args[1]
|
|
checks = util.load_check_directory()
|
|
for check in checks['initialized_checks']:
|
|
if check.name == check_name:
|
|
run_check(check)
|
|
|
|
elif 'check_all' == command:
|
|
print("Loading check directory...")
|
|
checks = util.load_check_directory()
|
|
print("...directory loaded.\n")
|
|
for check in checks['initialized_checks']:
|
|
run_check(check)
|
|
|
|
elif 'configcheck' == command or 'configtest' == command:
|
|
all_valid = True
|
|
paths = util.Paths()
|
|
for conf_path in glob.glob(os.path.join(paths.get_confd_path(), "*.yaml")):
|
|
basename = os.path.basename(conf_path)
|
|
try:
|
|
config.check_yaml(conf_path)
|
|
except Exception as e:
|
|
all_valid = False
|
|
print("%s contains errors:\n %s" % (basename, e))
|
|
else:
|
|
print("%s is valid" % basename)
|
|
if all_valid:
|
|
print("All yaml files passed. You can now run the Monitoring agent.")
|
|
return 0
|
|
else:
|
|
print("Fix the invalid yaml files above in order to start the Monitoring agent. "
|
|
"A useful external tool for yaml parsing can be found at "
|
|
"http://yaml-online-parser.appspot.com/")
|
|
return 1
|
|
|
|
elif 'jmx' == command:
|
|
|
|
if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
|
|
print("#" * 80)
|
|
print("JMX tool to be used to help configure your JMX checks.")
|
|
print("See http://docs.datadoghq.com/integrations/java/ for more information")
|
|
print("#" * 80)
|
|
print("\n")
|
|
print("You have to specify one of the following commands:")
|
|
for command, desc in jmxfetch.JMX_LIST_COMMANDS.items():
|
|
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
|
|
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes "
|
|
"tomcat jmx solr")
|
|
print("\n")
|
|
|
|
else:
|
|
jmx_command = args[1]
|
|
checks_list = args[2:]
|
|
paths = util.Paths()
|
|
confd_path = paths.get_confd_path()
|
|
# Start JMXFetch if needed
|
|
should_run = jmxfetch.JMXFetch.init(confd_path,
|
|
config,
|
|
15,
|
|
jmx_command,
|
|
checks_list,
|
|
reporter="console")
|
|
if not should_run:
|
|
print(
|
|
"Couldn't find any valid JMX configuration in your conf.d directory: %s" %
|
|
confd_path)
|
|
print("Have you enabled any JMX checks ?")
|
|
|
|
return 0
|
|
|
|
|
|
def run_check(check):
|
|
|
|
is_multi_threaded = False
|
|
if isinstance(check, status_checks.ServicesCheck):
|
|
is_multi_threaded = True
|
|
print("#" * 80)
|
|
print("Check name: '{0}'\n".format(check.name))
|
|
check.run()
|
|
# Sleep for a second and then run a second check to capture rate metrics
|
|
time.sleep(1)
|
|
check.run()
|
|
if is_multi_threaded:
|
|
# Sleep for a second to allow async threads to finish
|
|
time.sleep(1)
|
|
check.stop_pool()
|
|
print("Metrics: ")
|
|
check.get_metrics(prettyprint=True)
|
|
print("#" * 80 + "\n\n")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
sys.exit(main())
|
|
except Exception:
|
|
# Try our best to log the error.
|
|
try:
|
|
log.exception("Uncaught error running the Agent")
|
|
except Exception: # nosec
|
|
pass
|
|
raise
|