# (C) Copyright 2015 Hewlett Packard Enterprise Development Company LP import re import time import types from monasca_agent.collector.checks import AgentCheck from monasca_agent.common.util import get_hostname # When running with pymongo < 2.0 # Not the full spec for mongo URIs -- just extract username and password # http://www.mongodb.org/display/DOCS/connections6 mongo_uri_re = re.compile(r'mongodb://(?P[^:@]+):(?P[^:@]+)@.*') DEFAULT_TIMEOUT = 10 class MongoDb(AgentCheck): GAUGES = [ "indexCounters.btree.missRatio", "globalLock.ratio", "connections.current", "connections.available", "mem.resident", "mem.virtual", "mem.mapped", "cursors.totalOpen", "cursors.timedOut", "uptime", "stats.indexes", "stats.indexSize", "stats.objects", "stats.dataSize", "stats.storageSize", "replSet.health", "replSet.state", "replSet.replicationLag", "metrics.repl.buffer.count", "metrics.repl.buffer.maxSizeBytes", "metrics.repl.buffer.sizeBytes", ] RATES = [ "indexCounters.btree.accesses", "indexCounters.btree.hits", "indexCounters.btree.misses", "opcounters.insert", "opcounters.query", "opcounters.update", "opcounters.delete", "opcounters.getmore", "opcounters.command", "asserts.regular", "asserts.warning", "asserts.msg", "asserts.user", "asserts.rollovers", "metrics.document.deleted", "metrics.document.inserted", "metrics.document.returned", "metrics.document.updated", "metrics.getLastError.wtime.num", "metrics.getLastError.wtime.totalMillis", "metrics.getLastError.wtimeouts", "metrics.operation.fastmod", "metrics.operation.idhack", "metrics.operation.scanAndOrder", "metrics.queryExecutor.scanned", "metrics.record.moves", "metrics.repl.apply.batches.num", "metrics.repl.apply.batches.totalMillis", "metrics.repl.apply.ops", "metrics.repl.network.bytes", "metrics.repl.network.getmores.num", "metrics.repl.network.getmores.totalMillis", "metrics.repl.network.ops", "metrics.repl.network.readersCreated", "metrics.repl.oplog.insert.num", "metrics.repl.oplog.insert.totalMillis", "metrics.repl.oplog.insertBytes", "metrics.ttl.deletedDocuments", "metrics.ttl.passes", ] METRICS = GAUGES + RATES def __init__(self, name, init_config, agent_config): AgentCheck.__init__(self, name, init_config, agent_config) self._last_state_by_server = {} @staticmethod def get_library_versions(): try: import pymongo version = pymongo.version except ImportError: version = "Not Found" except AttributeError: version = "Unknown" return {"pymongo": version} def check_last_state(self, state, server, agentConfig): if self._last_state_by_server.get(server, -1) != state: self._last_state_by_server[server] = state return self.create_event(state, server, agentConfig) def create_event(self, state, server, agentConfig): """Create an event with a message describing the replication state of a mongo node """ def get_state_description(state): if state == 0: return 'Starting Up' elif state == 1: return 'Primary' elif state == 2: return 'Secondary' elif state == 3: return 'Recovering' elif state == 4: return 'Fatal' elif state == 5: return 'Starting up (forking threads)' elif state == 6: return 'Unknown' elif state == 7: return 'Arbiter' elif state == 8: return 'Down' elif state == 9: return 'Rollback' status = get_state_description(state) hostname = get_hostname(agentConfig) msg_title = "%s is %s" % (server, status) msg = "MongoDB %s just reported as %s" % (server, status) self.event({ 'timestamp': int(time.time()), 'event_type': 'Mongo', 'api_key': agentConfig['api_key'], 'msg_title': msg_title, 'msg_text': msg, 'host': hostname }) def check(self, instance): """Returns a dictionary that looks a lot like what's sent back by db.serverStatus(). """ if 'server' not in instance: self.log.warn("Missing 'server' in mongo config") return server = instance['server'] ssl_params = { 'ssl': instance.get('ssl', None), 'ssl_keyfile': instance.get('ssl_keyfile', None), 'ssl_certfile': instance.get('ssl_certfile', None), 'ssl_cert_reqs': instance.get('ssl_cert_reqs', None), 'ssl_ca_certs': instance.get('ssl_ca_certs', None) } for key, param in ssl_params.items(): if param is None: del ssl_params[key] dimensions = self._set_dimensions({'server': server}, instance) try: from pymongo import Connection except ImportError: self.log.error( 'mongo.yaml exists but pymongo module can not be imported. Skipping check.') raise Exception( 'Python PyMongo Module can not be imported. Please check the installation instruction on the Datadog Website') try: from pymongo import uri_parser # Configuration a URL, mongodb://user:pass@server/db parsed = uri_parser.parse_uri(server) except ImportError: # uri_parser is pymongo 2.0+ matches = mongo_uri_re.match(server) if matches: parsed = matches.groupdict() else: parsed = {} username = parsed.get('username') password = parsed.get('password') db_name = parsed.get('database') if not db_name: self.log.info('No MongoDB database found in URI. Defaulting to admin.') db_name = 'admin' do_auth = True if username is None or password is None: self.log.debug("Mongo: cannot extract username and password from config %s" % server) do_auth = False conn = Connection(server, network_timeout=DEFAULT_TIMEOUT, **ssl_params) db = conn[db_name] if do_auth: if not db.authenticate(username, password): self.log.error("Mongo: cannot connect with config %s" % server) status = db["$cmd"].find_one({"serverStatus": 1}) status['stats'] = db.command('dbstats') # Handle replica data, if any # See # http://www.mongodb.org/display/DOCS/Replica+Set+Commands#ReplicaSetCommands-replSetGetStatus try: data = {} replSet = db.command('replSetGetStatus') if replSet: primary = None current = None # find nodes: master and current node (ourself) for member in replSet.get('members'): if member.get('self'): current = member if int(member.get('state')) == 1: primary = member # If we have both we can compute a lag time if current is not None and primary is not None: lag = current['optimeDate'] - primary['optimeDate'] # Python 2.7 has this built in, python < 2.7 don't... if hasattr(lag, 'total_seconds'): data['replicationLag'] = lag.total_seconds() else: data['replicationLag'] = ( lag.microseconds + (lag.seconds + lag.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 if current is not None: data['health'] = current['health'] data['state'] = replSet['myState'] self.check_last_state(data['state'], server, self.agent_config) status['replSet'] = data except Exception as e: if "OperationFailure" in repr(e) and "replSetGetStatus" in str(e): pass else: raise e # If these keys exist, remove them for now as they cannot be serialized try: status['backgroundFlushing'].pop('last_finished') except KeyError: pass try: status.pop('localTime') except KeyError: pass # Go through the metrics and save the values for m in self.METRICS: # each metric is of the form: x.y.z with z optional # and can be found at status[x][y][z] value = status try: for c in m.split("."): value = value[c] except KeyError: continue # value is now status[x][y][z] assert type(value) in (types.IntType, types.LongType, types.FloatType) # Check if metric is a gauge or rate if m in self.GAUGES: m = self.normalize(m.lower(), 'mongodb') self.gauge(m, value, dimensions=dimensions) if m in self.RATES: m = self.normalize(m.lower(), 'mongodb') + "ps" self.rate(m, value, dimensions=dimensions)