c84e98d6b6
Replace the following items with Python 3 style code. - six.PY2 - six.add_metaclass - six.string_types - six.text_type - six.moves - six.StringIO - six.wraps - six.integer_types Story: 2008305 Task: 41191 Change-Id: I68710421b69d4049c9e990451da491dc14251fb5
292 lines
11 KiB
Python
292 lines
11 KiB
Python
import json
|
|
import os
|
|
import socket
|
|
import urllib
|
|
|
|
import monasca_agent.collector.checks as checks
|
|
|
|
|
|
def to_grafana_date(item):
|
|
# grafana can handle epoch style dates, but a bit differently
|
|
# they work if you get the standard epoch and muliply it by 1000
|
|
return float(item) * 1000
|
|
|
|
|
|
class SwiftReconException(Exception):
|
|
def __init__(self, msg, errcode=-1):
|
|
self.message = msg
|
|
self.errcode = errcode
|
|
|
|
|
|
class SwiftRecon(checks.AgentCheck):
|
|
|
|
def scout_host(self, base_url, recon_type, timeout=5):
|
|
"""Perform the actual HTTP request to obtain swift recon telemetry.
|
|
|
|
:param base_url: the base url of the host you wish to check. str of the
|
|
format 'http://127.0.0.1:6200/recon/'
|
|
:param recon_type: the swift recon check to request.
|
|
:returns: tuple of (recon url used, response body, and status)
|
|
"""
|
|
url = base_url + recon_type
|
|
try:
|
|
body = urllib.request.urlopen(url, timeout=timeout).read()
|
|
if isinstance(body, bytes):
|
|
body = body.decode('utf8')
|
|
content = json.loads(body)
|
|
self.log.debug("-> %s: %s" % (url, content))
|
|
status = 200
|
|
except urllib.error.HTTPError as err:
|
|
self.log.error("-> %s: %s" % (url, err))
|
|
raise SwiftReconException(err, err.code)
|
|
except (urllib.error.URLError, socket.timeout) as err:
|
|
self.log.error("-> %s: %s" % (url, err))
|
|
raise SwiftReconException(err)
|
|
return url, content, status
|
|
|
|
def _build_base_url(self, instance):
|
|
return "http://%(hostname)s:%(port)s/recon/" % instance
|
|
|
|
def _base_recon(self, instance, recon_type):
|
|
try:
|
|
url, content, status = self.scout_host(
|
|
self._build_base_url(instance), recon_type,
|
|
instance.get('timeout', 5))
|
|
|
|
dimensions = self._set_dimensions({'service': 'swift'}, instance)
|
|
return content, dimensions.copy()
|
|
except SwiftReconException as ex:
|
|
self.log.error('Error running {0}: ({1}) {2}'.format(
|
|
recon_type, ex.errcode, ex.message))
|
|
return None, None
|
|
|
|
def async_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'async')
|
|
if content is None or content['async_pending'] is None:
|
|
return None
|
|
|
|
self.gauge('swift_recon.object.async_pending',
|
|
content['async_pending'], dimensions)
|
|
|
|
def object_auditor_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'auditor/object')
|
|
if content is None:
|
|
return None
|
|
|
|
for key in ('object_auditor_stats_ALL', 'object_auditor_stats_ZBF'):
|
|
if key not in content:
|
|
continue
|
|
for item in ('audit_time', 'bytes_processed', 'passes', 'errors',
|
|
'quarantined'):
|
|
if item not in content[key] or content[key][item] is None:
|
|
continue
|
|
self.gauge(
|
|
'swift_recon.object.auditor.{0}.{1}'.format(key, item),
|
|
content[key][item], dimensions)
|
|
if 'start_time' in content[key] and content[key] is not None:
|
|
self.gauge(
|
|
'swift_recon.object.auditor.{0}.{1}'.format(
|
|
key, 'start_time'),
|
|
to_grafana_date(content[key]['start_time']), dimensions)
|
|
|
|
def updater_check(self, instance, server_type='object'):
|
|
content, dimensions = self._base_recon(
|
|
instance, 'updater/{0}'.format(server_type))
|
|
stat = '{0}_updater_sweep'.format(server_type)
|
|
if content is None or content[stat] is None:
|
|
return None
|
|
|
|
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
|
content[stat], dimensions)
|
|
|
|
def expirer_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'expirer/object')
|
|
if content is None:
|
|
return None
|
|
|
|
for stat in ('object_expiration_pass', 'expired_last_pass'):
|
|
if stat not in content or content[stat] is None:
|
|
continue
|
|
data = content[stat]
|
|
self.gauge(
|
|
'swift_recon.object.expirer.{0}'.format(stat),
|
|
data, dimensions)
|
|
|
|
def auditor_check(self, instance, server_type='container'):
|
|
content, dimensions = self._base_recon(
|
|
instance, 'auditor/{0}'.format(server_type))
|
|
if content is None:
|
|
return None
|
|
|
|
for stat in ('{0}_auditor_pass_completed'.format(server_type),
|
|
'{0}_audits_failed'.format(server_type),
|
|
'{0}_audits_passed'.format(server_type)):
|
|
|
|
if stat not in content or content[stat] is None:
|
|
continue
|
|
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
|
content[stat], dimensions)
|
|
|
|
stat = '{0}_audits_since'.format(server_type)
|
|
if stat not in content or content[stat] is None:
|
|
return None
|
|
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
|
to_grafana_date(content[stat]), dimensions)
|
|
|
|
def replication_check(self, instance, server_type):
|
|
if not server_type:
|
|
return None
|
|
|
|
content, dimensions = self._base_recon(
|
|
instance, 'replication/{0}'.format(server_type))
|
|
if content is None:
|
|
return None
|
|
|
|
for stat, is_date in (('replication_time', False),
|
|
('replication_last', True)):
|
|
if stat not in content or content[stat] is None:
|
|
continue
|
|
if is_date:
|
|
data = to_grafana_date(content[stat])
|
|
else:
|
|
data = content[stat]
|
|
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
|
data, dimensions)
|
|
|
|
for stat in ('attempted', 'failure', 'success'):
|
|
|
|
if stat not in content['replication_stats'] or \
|
|
content['replication_stats'][stat] is None:
|
|
continue
|
|
self.gauge('swift_recon.{0}.replication.{1}'.format(server_type,
|
|
stat),
|
|
content['replication_stats'][stat], dimensions)
|
|
|
|
def umount_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'unmounted')
|
|
if content is None:
|
|
return None
|
|
|
|
self.gauge('swift_recon.unmounted', len(content), dimensions)
|
|
|
|
def disk_usage(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'diskusage')
|
|
if content is None:
|
|
return None
|
|
|
|
for drive in content:
|
|
if not drive.get('device'):
|
|
continue
|
|
dimensions['device'] = drive['device']
|
|
for stat in ('mounted', 'size', 'used', 'avail'):
|
|
if isinstance(drive[stat], str) and \
|
|
not drive[stat].isdigit():
|
|
continue
|
|
self.gauge('swift_recon.disk_usage.{0}'.format(stat),
|
|
drive[stat], dimensions)
|
|
|
|
def get_ringmd5(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'ringmd5')
|
|
if content is None:
|
|
return None
|
|
|
|
for ring_file, md5 in content.items():
|
|
ring_file = os.path.basename(ring_file)
|
|
if '.' in ring_file:
|
|
ring_file = ring_file.split('.')[0]
|
|
if md5 is None:
|
|
md5 = ''
|
|
self.gauge(
|
|
'swift_recon.md5.{0}'.format(ring_file), md5, dimensions)
|
|
|
|
def get_swiftconfmd5(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'swiftconfmd5')
|
|
if content is None:
|
|
return None
|
|
|
|
_junk, md5 = content.items()[0]
|
|
if md5 is None:
|
|
md5 = ''
|
|
self.gauge('swift_recon.md5.swift_conf', md5, dimensions)
|
|
|
|
def quarantine_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'quarantined')
|
|
if content is None:
|
|
return None
|
|
|
|
for stat in ('accounts', 'containers'):
|
|
if stat not in content:
|
|
continue
|
|
dimensions['ring'] = stat[:-1]
|
|
self.gauge('swift_recon.quarantined',
|
|
content[stat], dimensions)
|
|
|
|
if 'policies' in content:
|
|
for pol_id in content['policies']:
|
|
ring = 'object' if not pol_id else 'object-{0}'.format(pol_id)
|
|
dimensions['ring'] = ring
|
|
self.gauge('swift_recon.quarantined',
|
|
content['policies'][pol_id]['objects'], dimensions)
|
|
elif 'objects' in content:
|
|
dimensions['ring'] = 'object'
|
|
self.gauge('swift_recon.quarantined',
|
|
content['objects'], dimensions)
|
|
|
|
def driveaudit_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'driveaudit')
|
|
if content is None or content['drive_audit_errors'] is None:
|
|
return None
|
|
|
|
self.gauge('swift_recon.drive_audit_errors',
|
|
content['drive_audit_errors'], dimensions)
|
|
|
|
def version_check(self, instance):
|
|
content, dimensions = self._base_recon(instance, 'version')
|
|
if content is None or content['version'] is None:
|
|
return None
|
|
|
|
self.gauge(
|
|
'swift_recon.swift_version', content['version'], dimensions)
|
|
|
|
def check(self, instance):
|
|
server_type = instance.get('server_type', '')
|
|
if not server_type:
|
|
self.log.warning('Missing server_type, so will only attempt '
|
|
'common checks')
|
|
server_type = ''
|
|
if not instance.get('hostname'):
|
|
self.log.error('Missing hostname')
|
|
return None
|
|
if not instance.get('port'):
|
|
self.log.error('Missing port')
|
|
return None
|
|
if server_type.upper() not in ('ACCOUNT', 'CONTAINER', 'OBJECT'):
|
|
self.log.warning('server_type name needs to be either account, '
|
|
'container or object')
|
|
|
|
if server_type == 'object':
|
|
self.async_check(instance)
|
|
self.object_auditor_check(instance)
|
|
self.updater_check(instance, server_type)
|
|
self.expirer_check(instance)
|
|
elif server_type == 'container':
|
|
self.auditor_check(instance, server_type)
|
|
self.updater_check(instance, server_type)
|
|
elif server_type == 'account':
|
|
self.auditor_check(instance, server_type)
|
|
|
|
if server_type:
|
|
self.replication_check(instance, server_type)
|
|
self.umount_check(instance)
|
|
self.disk_usage(instance)
|
|
|
|
# until we can find a way of sending something like an md5, we can
|
|
# run these
|
|
# self.get_ringmd5(instance)
|
|
# self.get_swiftconfmd5(instance)
|
|
self.quarantine_check(instance)
|
|
self.driveaudit_check(instance)
|
|
|
|
# Same with the version string.
|
|
# self.version_check(instance)
|