Add swift_recon check plugin to monasca
Swift outputs alot of statsd metrics that you can point directly at monasca-agents. However there is another swift endpoint, recon, that is used to gather more metrics. The Swift recon (or reconnaissance) API is an endpoint each of the storage node servers make available via a REST API. This API can either be hit manually or via the swift-recon tool. This patch adds a check plugin that hits the recon REST API and and send metrics to monasca. This is the first of two Swift check plugins I'm working on. For more details see my blog post[1] [1] - https://oliver.net.au/?p=358 Change-Id: I503d74936f6f37fb261c1592845968319695475a
This commit is contained in:
parent
18b3098b15
commit
0d929d01a8
17
conf.d/swift_recon.yaml.example
Normal file
17
conf.d/swift_recon.yaml.example
Normal file
@ -0,0 +1,17 @@
|
||||
init_config:
|
||||
collect_period: 300
|
||||
|
||||
instances:
|
||||
- name: swift-account
|
||||
server_type: account
|
||||
hostname: localhost
|
||||
port: 6012
|
||||
timeout: 5
|
||||
- name: swift-container
|
||||
server_type: container
|
||||
hostname: localhost
|
||||
port: 6011
|
||||
- name: swift-object
|
||||
server_type: object
|
||||
hostname: localhost
|
||||
port: 6010
|
@ -86,6 +86,8 @@
|
||||
- [SQLServer](#sqlserver)
|
||||
- [Supervisord](#supervisord)
|
||||
- [Swift Diags](#swift-diags)
|
||||
- [Swift Recon](#swift-recon)
|
||||
- [Sample Config](#sample-config)
|
||||
- [TCP Check](#tcp-check)
|
||||
- [Varnish](#varnish)
|
||||
- [VCenter](#vcenter)
|
||||
@ -2649,6 +2651,73 @@ See [the example configuration](https://github.com/openstack/monasca-agent/blob/
|
||||
## Swift Diags
|
||||
See [the example configuration](https://github.com/openstack/monasca-agent/blob/master/conf.d/swift_diags.yaml.example) for how to configure the Swift Diags plugin.
|
||||
|
||||
## Swift Recon
|
||||
Swift outputs a lot of statsd metrics that you can point directly at
|
||||
monasca-agents. However there is another swift endpoint, recon, that is used
|
||||
to gather more metrics.
|
||||
|
||||
The Swift recon (or reconnaissance) API is an endpoint each of the storage
|
||||
node servers make available via a REST API. This API can either be hit
|
||||
manually or via the swift-recon tool. Or via this plugin. This plugin queries
|
||||
the Swift Recon endpoint of the given node and reports these extra metrics
|
||||
back via Monasca.
|
||||
|
||||
There are many metrics available via swift recon, this plugin returns all
|
||||
date/timestamp and float/integer metrics. These include:
|
||||
|
||||
| Metric name | Description |
|
||||
|-------------|-------------|
|
||||
| swift_recon.object.async_pending | Number of container updates in async pending |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.audit_time| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.bytes_processed| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.passes| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.errors| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.quarantined| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ALL.start_time| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.audit_time| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.bytes_processed| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.passes| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.errors| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.quarantined| |
|
||||
| swift_recon.object.auditor.object_auditor_stats_ZBF.start_time| |
|
||||
| swift_recon.object.object_updater_sweep | |
|
||||
| swift_recon.container.container_updater_sweep | |
|
||||
| swift_recon.object.expirer.object_expiration_pass | |
|
||||
| swift_recon.object.expirer.expired_last_pass | |
|
||||
| swift_recon.container.container_auditor_pass_completed | |
|
||||
| swift_recon.container.container_audits_failed | |
|
||||
| swift_recon.container.container_audits_passed | |
|
||||
| swift_recon.container.container_audits_since | |
|
||||
| swift_recon.account.account_auditor_pass_completed | |
|
||||
| swift_recon.account.account_audits_failed | |
|
||||
| swift_recon.account.account_audits_passed | |
|
||||
| swift_recon.account.account_audits_since | |
|
||||
| swift_recon.object.replication_time | |
|
||||
| swift_recon.object.replication_last | |
|
||||
| swift_recon.object.replication.attempted | |
|
||||
| swift_recon.object.replication.failure | |
|
||||
| swift_recon.object.replication.success | |
|
||||
| swift_recon.container.replication_time | |
|
||||
| swift_recon.container.replication_last | |
|
||||
| swift_recon.container.replication.attempted | |
|
||||
| swift_recon.container.replication.failure | |
|
||||
| swift_recon.container.replication.success | |
|
||||
| swift_recon.account.replication_time | |
|
||||
| swift_recon.account.replication_last | |
|
||||
| swift_recon.account.replication.attempted | |
|
||||
| swift_recon.account.replication.failure | |
|
||||
| swift_recon.account.replication.success | |
|
||||
| swift_recon.unmounted | |
|
||||
| swift_recon.disk_usage.mounted | |
|
||||
| swift_recon.disk_usage.size | |
|
||||
| swift_recon.disk_usage.used | |
|
||||
| swift_recon.disk_usage.avail | |
|
||||
| swift_recon.quarantined | with policy and ring names as dimensions to drill down |
|
||||
| swift_recon.drive_audit_errors | |
|
||||
|
||||
### Sample Config
|
||||
See [the example configuration](https://github.com/openstack/monasca-agent/blob/master/conf.d/swift_recon.yaml.example)
|
||||
|
||||
## TCP Check
|
||||
See [the example configuration](https://github.com/openstack/monasca-agent/blob/master/conf.d/tcp_check.yaml.example) for how to configure the TCP Check plugin.
|
||||
|
||||
|
292
monasca_agent/collector/checks_d/swift_recon.py
Normal file
292
monasca_agent/collector/checks_d/swift_recon.py
Normal file
@ -0,0 +1,292 @@
|
||||
import json
|
||||
import os
|
||||
import six
|
||||
from six.moves import urllib
|
||||
import socket
|
||||
|
||||
import monasca_agent.collector.checks as checks
|
||||
|
||||
|
||||
def to_grafana_date(item):
|
||||
# grafana can handle epoch style dates, but a bit differently
|
||||
# they work if you get the standard epoch and muliply it by 1000
|
||||
return float(item) * 1000
|
||||
|
||||
|
||||
class SwiftReconException(Exception):
|
||||
def __init__(self, msg, errcode=-1):
|
||||
self.message = msg
|
||||
self.errcode = errcode
|
||||
|
||||
|
||||
class SwiftRecon(checks.AgentCheck):
|
||||
|
||||
def scout_host(self, base_url, recon_type, timeout=5):
|
||||
"""Perform the actual HTTP request to obtain swift recon telemetry.
|
||||
|
||||
:param base_url: the base url of the host you wish to check. str of the
|
||||
format 'http://127.0.0.1:6200/recon/'
|
||||
:param recon_type: the swift recon check to request.
|
||||
:returns: tuple of (recon url used, response body, and status)
|
||||
"""
|
||||
url = base_url + recon_type
|
||||
try:
|
||||
body = urllib.request.urlopen(url, timeout=timeout).read()
|
||||
if six.PY3 and isinstance(body, six.binary_type):
|
||||
body = body.decode('utf8')
|
||||
content = json.loads(body)
|
||||
self.log.debug("-> %s: %s" % (url, content))
|
||||
status = 200
|
||||
except urllib.error.HTTPError as err:
|
||||
self.log.error("-> %s: %s" % (url, err))
|
||||
raise SwiftReconException(err, err.code)
|
||||
except (urllib.error.URLError, socket.timeout) as err:
|
||||
self.log.error("-> %s: %s" % (url, err))
|
||||
raise SwiftReconException(err)
|
||||
return url, content, status
|
||||
|
||||
def _build_base_url(self, instance):
|
||||
return "http://%(hostname)s:%(port)s/recon/" % instance
|
||||
|
||||
def _base_recon(self, instance, recon_type):
|
||||
try:
|
||||
url, content, status = self.scout_host(
|
||||
self._build_base_url(instance), recon_type,
|
||||
instance.get('timeout', 5))
|
||||
|
||||
dimensions = self._set_dimensions({'service': 'swift'}, instance)
|
||||
return content, dimensions.copy()
|
||||
except SwiftReconException as ex:
|
||||
self.log.error('Error running {0}: ({1}) {2}'.format(
|
||||
recon_type, ex.errcode, ex.message))
|
||||
return None, None
|
||||
|
||||
def async_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'async')
|
||||
if content is None or content['async_pending'] is None:
|
||||
return None
|
||||
|
||||
self.gauge('swift_recon.object.async_pending',
|
||||
content['async_pending'], dimensions)
|
||||
|
||||
def object_auditor_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'auditor/object')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for key in ('object_auditor_stats_ALL', 'object_auditor_stats_ZBF'):
|
||||
if key not in content:
|
||||
continue
|
||||
for item in ('audit_time', 'bytes_processed', 'passes', 'errors',
|
||||
'quarantined'):
|
||||
if item not in content[key] or content[key][item] is None:
|
||||
continue
|
||||
self.gauge(
|
||||
'swift_recon.object.auditor.{0}.{1}'.format(key, item),
|
||||
content[key][item], dimensions)
|
||||
if 'start_time' in content[key] and content[key] is not None:
|
||||
self.gauge(
|
||||
'swift_recon.object.auditor.{0}.{1}'.format(
|
||||
key, 'start_time'),
|
||||
to_grafana_date(content[key]['start_time']), dimensions)
|
||||
|
||||
def updater_check(self, instance, server_type='object'):
|
||||
content, dimensions = self._base_recon(
|
||||
instance, 'updater/{0}'.format(server_type))
|
||||
stat = '{0}_updater_sweep'.format(server_type)
|
||||
if content is None or content[stat] is None:
|
||||
return None
|
||||
|
||||
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
||||
content[stat], dimensions)
|
||||
|
||||
def expirer_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'expirer/object')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for stat in ('object_expiration_pass', 'expired_last_pass'):
|
||||
if stat not in content or content[stat] is None:
|
||||
continue
|
||||
data = content[stat]
|
||||
self.gauge(
|
||||
'swift_recon.object.expirer.{0}'.format(stat),
|
||||
data, dimensions)
|
||||
|
||||
def auditor_check(self, instance, server_type='container'):
|
||||
content, dimensions = self._base_recon(
|
||||
instance, 'auditor/{0}'.format(server_type))
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for stat in ('{0}_auditor_pass_completed'.format(server_type),
|
||||
'{0}_audits_failed'.format(server_type),
|
||||
'{0}_audits_passed'.format(server_type)):
|
||||
|
||||
if stat not in content or content[stat] is None:
|
||||
continue
|
||||
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
||||
content[stat], dimensions)
|
||||
|
||||
stat = '{0}_audits_since'.format(server_type)
|
||||
if stat not in content or content[stat] is None:
|
||||
return None
|
||||
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
||||
to_grafana_date(content[stat]), dimensions)
|
||||
|
||||
def replication_check(self, instance, server_type):
|
||||
if not server_type:
|
||||
return None
|
||||
|
||||
content, dimensions = self._base_recon(
|
||||
instance, 'replication/{0}'.format(server_type))
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for stat, is_date in (('replication_time', False),
|
||||
('replication_last', True)):
|
||||
if stat not in content or content[stat] is None:
|
||||
continue
|
||||
if is_date:
|
||||
data = to_grafana_date(content[stat])
|
||||
else:
|
||||
data = content[stat]
|
||||
self.gauge('swift_recon.{0}.{1}'.format(server_type, stat),
|
||||
data, dimensions)
|
||||
|
||||
for stat in ('attempted', 'failure', 'success'):
|
||||
|
||||
if stat not in content['replication_stats'] or \
|
||||
content['replication_stats'][stat] is None:
|
||||
continue
|
||||
self.gauge('swift_recon.{0}.replication.{1}'.format(server_type,
|
||||
stat),
|
||||
content['replication_stats'][stat], dimensions)
|
||||
|
||||
def umount_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'unmounted')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
self.gauge('swift_recon.unmounted', len(content), dimensions)
|
||||
|
||||
def disk_usage(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'diskusage')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for drive in content:
|
||||
if not drive.get('device'):
|
||||
continue
|
||||
dimensions['device'] = drive['device']
|
||||
for stat in ('mounted', 'size', 'used', 'avail'):
|
||||
if isinstance(drive[stat], six.string_types) and \
|
||||
not drive[stat].isdigit():
|
||||
continue
|
||||
self.gauge('swift_recon.disk_usage.{0}'.format(stat),
|
||||
drive[stat], dimensions)
|
||||
|
||||
def get_ringmd5(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'ringmd5')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for ring_file, md5 in content.items():
|
||||
ring_file = os.path.basename(ring_file)
|
||||
if '.' in ring_file:
|
||||
ring_file = ring_file.split('.')[0]
|
||||
if md5 is None:
|
||||
md5 = ''
|
||||
self.gauge(
|
||||
'swift_recon.md5.{0}'.format(ring_file), md5, dimensions)
|
||||
|
||||
def get_swiftconfmd5(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'swiftconfmd5')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
_junk, md5 = content.items()[0]
|
||||
if md5 is None:
|
||||
md5 = ''
|
||||
self.gauge('swift_recon.md5.swift_conf', md5, dimensions)
|
||||
|
||||
def quarantine_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'quarantined')
|
||||
if content is None:
|
||||
return None
|
||||
|
||||
for stat in ('accounts', 'containers'):
|
||||
if stat not in content:
|
||||
continue
|
||||
dimensions['ring'] = stat[:-1]
|
||||
self.gauge('swift_recon.quarantined',
|
||||
content[stat], dimensions)
|
||||
|
||||
if 'policies' in content:
|
||||
for pol_id in content['policies']:
|
||||
ring = 'object' if not pol_id else 'object-{0}'.format(pol_id)
|
||||
dimensions['ring'] = ring
|
||||
self.gauge('swift_recon.quarantined',
|
||||
content['policies'][pol_id]['objects'], dimensions)
|
||||
elif 'objects' in content:
|
||||
dimensions['ring'] = 'object'
|
||||
self.gauge('swift_recon.quarantined',
|
||||
content['objects'], dimensions)
|
||||
|
||||
def driveaudit_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'driveaudit')
|
||||
if content is None or content['drive_audit_errors'] is None:
|
||||
return None
|
||||
|
||||
self.gauge('swift_recon.drive_audit_errors',
|
||||
content['drive_audit_errors'], dimensions)
|
||||
|
||||
def version_check(self, instance):
|
||||
content, dimensions = self._base_recon(instance, 'version')
|
||||
if content is None or content['version'] is None:
|
||||
return None
|
||||
|
||||
self.gauge(
|
||||
'swift_recon.swift_version', content['version'], dimensions)
|
||||
|
||||
def check(self, instance):
|
||||
server_type = instance.get('server_type', '')
|
||||
if not server_type:
|
||||
self.log.warning('Missing server_type, so will only attempt '
|
||||
'common checks')
|
||||
server_type = ''
|
||||
if not instance.get('hostname'):
|
||||
self.log.error('Missing hostname')
|
||||
return None
|
||||
if not instance.get('port'):
|
||||
self.log.error('Missing port')
|
||||
return None
|
||||
if server_type.upper() not in ('ACCOUNT', 'CONTAINER', 'OBJECT'):
|
||||
self.log.warning('server_type name needs to be either account, '
|
||||
'container or object')
|
||||
|
||||
if server_type == 'object':
|
||||
self.async_check(instance)
|
||||
self.object_auditor_check(instance)
|
||||
self.updater_check(instance, server_type)
|
||||
self.expirer_check(instance)
|
||||
elif server_type == 'container':
|
||||
self.auditor_check(instance, server_type)
|
||||
self.updater_check(instance, server_type)
|
||||
elif server_type == 'account':
|
||||
self.auditor_check(instance, server_type)
|
||||
|
||||
if server_type:
|
||||
self.replication_check(instance, server_type)
|
||||
self.umount_check(instance)
|
||||
self.disk_usage(instance)
|
||||
|
||||
# until we can find a way of sending something like an md5, we can
|
||||
# run these
|
||||
# self.get_ringmd5(instance)
|
||||
# self.get_swiftconfmd5(instance)
|
||||
self.quarantine_check(instance)
|
||||
self.driveaudit_check(instance)
|
||||
|
||||
# Same with the version string.
|
||||
# self.version_check(instance)
|
586
tests/checks_d/test_swift_recon.py
Normal file
586
tests/checks_d/test_swift_recon.py
Normal file
@ -0,0 +1,586 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import mock
|
||||
import unittest
|
||||
from collections import defaultdict
|
||||
|
||||
from monasca_agent.collector.checks_d import swift_recon
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def __init__(self):
|
||||
self.log = {'error': [],
|
||||
'warning': []}
|
||||
|
||||
def _write_msg(self, msg, key):
|
||||
self.log[key].append(msg)
|
||||
|
||||
def error(self, msg):
|
||||
self._write_msg(msg, 'error')
|
||||
|
||||
def warning(self, msg):
|
||||
self._write_msg(msg, 'warning')
|
||||
|
||||
def get_loglines(self, key):
|
||||
return self.log[key]
|
||||
|
||||
|
||||
class MockSwiftRecon(swift_recon.SwiftRecon):
|
||||
def __init__(self):
|
||||
super(MockSwiftRecon, self).__init__(
|
||||
name='swift_recon',
|
||||
init_config={},
|
||||
instances=[],
|
||||
agent_config={}
|
||||
)
|
||||
self.log = FakeLogger()
|
||||
self.scout_returns = []
|
||||
self.reset_gauge()
|
||||
|
||||
def scout_host(self, base_url, recon_type, timeout=5):
|
||||
if not self.scout_returns:
|
||||
raise swift_recon.SwiftReconException("Mock error")
|
||||
if isinstance(self.scout_returns[0], swift_recon.SwiftReconException):
|
||||
raise self.scout_returns.pop(0)
|
||||
else:
|
||||
return self.scout_returns.pop(0)
|
||||
|
||||
def gauge(self, key, value, dimensions, *args, **kwargs):
|
||||
self.gauge_called = True
|
||||
self.gauge_calls[key].append(value)
|
||||
for k, v in dimensions.items():
|
||||
self.dimensions[k].add(v)
|
||||
|
||||
def reset_gauge(self):
|
||||
self.gauge_called = False
|
||||
self.gauge_calls = defaultdict(list)
|
||||
self.dimensions = defaultdict(set)
|
||||
|
||||
|
||||
class SwiftReconTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super(SwiftReconTest, self).setUp()
|
||||
self.swiftrecon = MockSwiftRecon()
|
||||
|
||||
def test_to_grafana_date(self):
|
||||
for item in (0, 1, 5, 10, 10000, 9.999, "34984", '2303.230420'):
|
||||
self.assertEqual(float(item) * 1000,
|
||||
swift_recon.to_grafana_date(item))
|
||||
|
||||
def test_build_base_url(self):
|
||||
instance = {'hostname': 'a.great.url', 'port': 1234}
|
||||
expected = "http://a.great.url:1234/recon/"
|
||||
self.assertEqual(self.swiftrecon._build_base_url(instance), expected)
|
||||
|
||||
def test_base_recon(self):
|
||||
instance = {'hostname': 'a.great.url', 'port': 1234}
|
||||
self.swiftrecon.scout_returns = [
|
||||
("http://a.great.url:1234/recon/", {}, 200), ]
|
||||
|
||||
# When scout is successful we just get the de-jsoned content and
|
||||
# a dimensions dict.
|
||||
content, dim = self.swiftrecon._base_recon(instance, 'blah')
|
||||
self.assertDictEqual(content, {})
|
||||
|
||||
# An error will return None, None
|
||||
content, dim = self.swiftrecon._base_recon(instance, 'blah')
|
||||
self.assertIsNone(content)
|
||||
self.assertIsNone(dim)
|
||||
|
||||
def _setup_speced_mock(self):
|
||||
mocked = MockSwiftRecon()
|
||||
mocked.log = FakeLogger()
|
||||
mocked.umount_check = mock.Mock()
|
||||
mocked.disk_usage = mock.Mock()
|
||||
mocked.quarantine_check = mock.Mock()
|
||||
mocked.driveaudit_check = mock.Mock()
|
||||
mocked.async_check = mock.Mock()
|
||||
mocked.object_auditor_check = mock.Mock()
|
||||
mocked.updater_check = mock.Mock()
|
||||
mocked.expirer_check = mock.Mock()
|
||||
mocked.auditor_check = mock.Mock()
|
||||
mocked.replication_check = mock.Mock()
|
||||
return mocked
|
||||
|
||||
def test_check_missing_options(self):
|
||||
# missing hostname
|
||||
instance = {'server_type': 'object', 'port': 1234}
|
||||
self.swiftrecon.check(instance)
|
||||
self.assertIn('Missing hostname',
|
||||
self.swiftrecon.log.get_loglines('error'))
|
||||
|
||||
# missing port
|
||||
instance = {'server_type': 'object', 'hostname': 'a.great.url'}
|
||||
self.swiftrecon.log = FakeLogger()
|
||||
self.swiftrecon.check(instance)
|
||||
self.assertIn('Missing port',
|
||||
self.swiftrecon.log.get_loglines('error'))
|
||||
|
||||
# Missing server_type
|
||||
mocked_swift = self._setup_speced_mock()
|
||||
instance = {'hostname': 'a.great.url', 'port': 1234}
|
||||
called = ('umount_check', 'disk_usage', 'quarantine_check',
|
||||
'driveaudit_check')
|
||||
not_called = ('async_check', 'object_auditor_check', 'updater_check',
|
||||
'expirer_check', 'auditor_check', 'replication_check')
|
||||
mocked_swift.check(instance)
|
||||
self.assertIn('Missing server_type, so will only attempt '
|
||||
'common checks',
|
||||
mocked_swift.log.get_loglines('warning'))
|
||||
|
||||
# only checks that aren't server_type related will be tested
|
||||
for method in called:
|
||||
self.assertTrue(getattr(mocked_swift, method).called)
|
||||
for method in not_called:
|
||||
self.assertFalse(getattr(mocked_swift, method).called)
|
||||
|
||||
def test_checks_for_object_server_type(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
mocked_swift = self._setup_speced_mock()
|
||||
called = ('async_check', 'object_auditor_check', 'updater_check',
|
||||
'expirer_check', 'replication_check', 'umount_check',
|
||||
'disk_usage', 'quarantine_check', 'driveaudit_check')
|
||||
not_called = ('auditor_check', )
|
||||
mocked_swift.check(instance)
|
||||
|
||||
for method in called:
|
||||
self.assertTrue(getattr(mocked_swift, method).called)
|
||||
for method in not_called:
|
||||
self.assertFalse(getattr(mocked_swift, method).called)
|
||||
|
||||
def test_checks_for_container_server_type(self):
|
||||
instance = {'server_type': 'container', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
mocked_swift = self._setup_speced_mock()
|
||||
called = ('updater_check', 'auditor_check', 'replication_check',
|
||||
'umount_check', 'disk_usage', 'quarantine_check',
|
||||
'driveaudit_check')
|
||||
not_called = ('async_check', 'object_auditor_check', 'expirer_check')
|
||||
mocked_swift.check(instance)
|
||||
|
||||
for method in called:
|
||||
self.assertTrue(getattr(mocked_swift, method).called)
|
||||
for method in not_called:
|
||||
self.assertFalse(getattr(mocked_swift, method).called)
|
||||
|
||||
def test_checks_for_account_server_type(self):
|
||||
instance = {'server_type': 'account', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
mocked_swift = self._setup_speced_mock()
|
||||
called = ('auditor_check', 'replication_check',
|
||||
'umount_check', 'disk_usage', 'quarantine_check',
|
||||
'driveaudit_check')
|
||||
not_called = ('updater_check', 'async_check', 'object_auditor_check',
|
||||
'expirer_check')
|
||||
mocked_swift.check(instance)
|
||||
|
||||
for method in called:
|
||||
self.assertTrue(getattr(mocked_swift, method).called)
|
||||
for method in not_called:
|
||||
self.assertFalse(getattr(mocked_swift, method).called)
|
||||
|
||||
def _test_scout_error_no_gauge(self, func, *args):
|
||||
# first time we run a check, a SwiftReconException will be thrown
|
||||
# so the gauge mock wont have been called.
|
||||
|
||||
self.swiftrecon.reset_gauge()
|
||||
self.swiftrecon.scout_returns = [
|
||||
swift_recon.SwiftReconException('test')]
|
||||
func(*args)
|
||||
self.assertFalse(self.swiftrecon.gauge_called)
|
||||
|
||||
def test_umount_check(self):
|
||||
instance = {'server_type': 'account', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.umount_check,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content = [{"device": "sdb1", "mounted": False},
|
||||
{"device": "sdb5", "mounted": False}]
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content, 200)]
|
||||
|
||||
self.swiftrecon.umount_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
self.assertIn('swift_recon.unmounted', self.swiftrecon.gauge_calls)
|
||||
self.assertEqual(
|
||||
self.swiftrecon.gauge_calls['swift_recon.unmounted'][0], 2)
|
||||
|
||||
def test_disk_usage(self):
|
||||
instance = {'server_type': 'account', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.disk_usage,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content = [
|
||||
{"device": "sdb1", "avail": "", "mounted": False,
|
||||
"used": "", "size": ""},
|
||||
{"device": "sdb5", "avail": "500", "mounted": True,
|
||||
"used": "200", "size": "700"}]
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content, 200)]
|
||||
|
||||
self.swiftrecon.disk_usage(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
for dim in ('sdb1', 'sdb5'):
|
||||
self.assertIn(dim, self.swiftrecon.dimensions['device'])
|
||||
|
||||
for stat, count in (('mounted', 2), ('size', 1), ('used', 1),
|
||||
('avail', 1)):
|
||||
self.assertIn('swift_recon.disk_usage.{0}'.format(stat),
|
||||
self.swiftrecon.gauge_calls)
|
||||
# We only send int values, so there should only be mounted sent
|
||||
# more then once.
|
||||
self.assertEqual(
|
||||
len(self.swiftrecon.gauge_calls[
|
||||
'swift_recon.disk_usage.{0}'.format(stat)]), count)
|
||||
|
||||
def test_quarantine_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.quarantine_check,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content = {
|
||||
"objects": 0, "accounts": 1, "containers": 2,
|
||||
"policies": {
|
||||
0: {"objects": 5},
|
||||
1: {"objects": 4}
|
||||
}}
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content, 200),
|
||||
(expected_url, {'objects': 1, 'containers': 2,
|
||||
'accounts': 3}, 200)]
|
||||
|
||||
# first we test the result from a Swift 2+ storage node (has
|
||||
# storage polices)
|
||||
self.swiftrecon.quarantine_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
self.assertIn('swift_recon.quarantined', self.swiftrecon.gauge_calls)
|
||||
values = self.swiftrecon.gauge_calls['swift_recon.quarantined']
|
||||
self.assertEqual(len(values), 4)
|
||||
self.assertListEqual(values, [1, 2, 5, 4])
|
||||
self.assertSetEqual(self.swiftrecon.dimensions['ring'],
|
||||
{'account', 'container', 'object', 'object-1'})
|
||||
|
||||
# now let's try a pre-storage policy result (swift <2.0)
|
||||
self.swiftrecon.reset_gauge()
|
||||
self.swiftrecon.quarantine_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
self.assertIn('swift_recon.quarantined', self.swiftrecon.gauge_calls)
|
||||
values = self.swiftrecon.gauge_calls['swift_recon.quarantined']
|
||||
self.assertEqual(len(values), 3)
|
||||
self.assertListEqual(values, [3, 2, 1])
|
||||
self.assertSetEqual(self.swiftrecon.dimensions['ring'],
|
||||
{'account', 'container', 'object'})
|
||||
|
||||
def test_driveaudit_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.driveaudit_check,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, {"drive_audit_errors": 5}, 200),
|
||||
(expected_url, {"drive_audit_errors": None}, 200)]
|
||||
|
||||
self.swiftrecon.driveaudit_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
self.assertIn('swift_recon.drive_audit_errors',
|
||||
self.swiftrecon.gauge_calls)
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls['swift_recon.drive_audit_errors'], [5])
|
||||
|
||||
# If the result it None the gauge wont be called
|
||||
self.swiftrecon.reset_gauge()
|
||||
self.swiftrecon.driveaudit_check(instance)
|
||||
self.assertFalse(self.swiftrecon.gauge_called)
|
||||
|
||||
def test_async_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.async_check,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, {"async_pending": 12}, 200),
|
||||
(expected_url, {"async_pending": None}, 200)]
|
||||
|
||||
self.swiftrecon.async_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
self.assertIn('swift_recon.object.async_pending',
|
||||
self.swiftrecon.gauge_calls)
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls['swift_recon.object.async_pending'],
|
||||
[12])
|
||||
|
||||
# If the result it None the gauge wont be called
|
||||
self.swiftrecon.reset_gauge()
|
||||
self.swiftrecon.async_check(instance)
|
||||
self.assertFalse(self.swiftrecon.gauge_called)
|
||||
|
||||
def test_object_auditor_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.object_auditor_check,
|
||||
instance)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content = {
|
||||
"object_auditor_stats_ALL": {
|
||||
"passes": 5,
|
||||
"errors": 1,
|
||||
"audit_time": 4,
|
||||
"start_time": 1531724606.053309,
|
||||
"quarantined": 2,
|
||||
"bytes_processed": 11885},
|
||||
"object_auditor_stats_ZBF": {
|
||||
"passes": 3,
|
||||
"errors": 0,
|
||||
"audit_time": 0,
|
||||
"start_time": 1531724665.303363,
|
||||
"quarantined": 0,
|
||||
"bytes_processed": 0}}
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content, 200)]
|
||||
|
||||
self.swiftrecon.object_auditor_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
prefix = 'swift_recon.object.auditor.{}'
|
||||
scout_key = 'object_auditor_stats_{}'
|
||||
for frag in ('ALL', 'ZBF'):
|
||||
scout_metric = scout_key.format(frag)
|
||||
metric = prefix.format(scout_metric)
|
||||
for key in scout_content[scout_metric]:
|
||||
self.assertIn('{}.{}'.format(metric, key),
|
||||
self.swiftrecon.gauge_calls)
|
||||
if key == 'start_time':
|
||||
# there is a hack to make epochs dates in grafana and that
|
||||
# is to time it by 1000
|
||||
scout_content[scout_metric][key] = \
|
||||
scout_content[scout_metric][key] * 1000
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls['{}.{}'.format(metric, key)],
|
||||
[scout_content[scout_metric][key]])
|
||||
|
||||
def test_updater_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
for server_type in ('object', 'container'):
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.updater_check,
|
||||
instance, server_type)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, {"object_updater_sweep":
|
||||
0.10095596313476562}, 200),
|
||||
(expected_url, {"container_updater_sweep":
|
||||
0.11764812469482422}, 200)]
|
||||
|
||||
metric = 'swift_recon.{0}.{0}_updater_sweep'
|
||||
|
||||
self.swiftrecon.updater_check(instance, 'object')
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
self.assertIn(metric.format('object'), self.swiftrecon.gauge_calls)
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls[metric.format('object')],
|
||||
[0.10095596313476562])
|
||||
self.swiftrecon.reset_gauge()
|
||||
|
||||
instance = {'server_type': 'container', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
self.swiftrecon.updater_check(instance, 'container')
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
self.assertIn(metric.format('container'), self.swiftrecon.gauge_calls)
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls[metric.format('container')],
|
||||
[0.11764812469482422])
|
||||
|
||||
def test_expirer_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
for server_type in ('object', 'container'):
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.updater_check,
|
||||
instance, server_type)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content = {"object_expiration_pass": 0.021467924118041992,
|
||||
"expired_last_pass": 5}
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content, 200)]
|
||||
|
||||
metric = 'swift_recon.object.expirer.{}'
|
||||
|
||||
self.swiftrecon.expirer_check(instance)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
for stat in ('object_expiration_pass', 'expired_last_pass'):
|
||||
self.assertIn(metric.format(stat), self.swiftrecon.gauge_calls)
|
||||
|
||||
for key in scout_content:
|
||||
self.assertListEqual(
|
||||
self.swiftrecon.gauge_calls[metric.format(key)],
|
||||
[scout_content[key]])
|
||||
|
||||
def test_auditor_check(self):
|
||||
instance = {'server_type': 'container', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
for server_type in ('container', 'account'):
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.auditor_check,
|
||||
instance, server_type)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content_cont = {
|
||||
"container_audits_passed": 6,
|
||||
"container_auditor_pass_completed": 0.015977859497070312,
|
||||
"container_audits_since": 1531714368.710222,
|
||||
"container_audits_failed": 0}
|
||||
scout_content_acc = {
|
||||
"account_audits_passed": 2,
|
||||
"account_audits_failed": 1,
|
||||
"account_auditor_pass_completed": 7.200241088867188,
|
||||
"account_audits_since": 1531415933.143866}
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content_cont, 200),
|
||||
(expected_url, scout_content_acc, 200)]
|
||||
|
||||
for server_type, content in (('container', scout_content_cont),
|
||||
('account', scout_content_acc)):
|
||||
instance['server_type'] = server_type
|
||||
self.swiftrecon.auditor_check(instance, server_type)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
for key in content:
|
||||
metric = 'swift_recon.{0}.{1}'.format(server_type, key)
|
||||
self.assertIn(metric, self.swiftrecon.gauge_calls)
|
||||
if key.endswith('_audits_since'):
|
||||
# there is a hack to make epochs dates in grafana and that
|
||||
# is to time it by 1000
|
||||
content[key] = content[key] * 1000
|
||||
self.assertListEqual(self.swiftrecon.gauge_calls[metric],
|
||||
[content[key]])
|
||||
self.swiftrecon.reset_gauge()
|
||||
|
||||
def test_replication_check(self):
|
||||
instance = {'server_type': 'object', 'hostname': 'awesome.host',
|
||||
'port': 1234}
|
||||
|
||||
# test that the error case doesn't call gauge
|
||||
for server_type in ('object', 'container', 'account'):
|
||||
self._test_scout_error_no_gauge(self.swiftrecon.replication_check,
|
||||
instance, server_type)
|
||||
|
||||
# now check the correct case
|
||||
expected_url = self.swiftrecon._build_base_url(instance)
|
||||
scout_content_obj = {
|
||||
"replication_last": 1531724665.483373,
|
||||
"replication_stats": {
|
||||
"rsync": 0, "success": 1194, "attempted": 647,
|
||||
"remove": 0, "suffix_count": 1460, "failure": 1,
|
||||
"hashmatch": 1194, "suffix_hash": 0, "suffix_sync": 0},
|
||||
"object_replication_last": 1531724665.483373,
|
||||
"replication_time": 0.11197113196055095,
|
||||
"object_replication_time": 0.11197113196055095}
|
||||
scout_content_cont = {
|
||||
"replication_last": 1531724675.549912,
|
||||
"replication_stats": {
|
||||
"no_change": 8, "rsync": 0, "success": 8,
|
||||
"deferred": 0, "attempted": 4, "ts_repl": 0,
|
||||
"remove": 0, "remote_merge": 0, "diff_capped": 0,
|
||||
"failure": 0, "hashmatch": 0, "diff": 0,
|
||||
"start": 1531724675.488349, "empty": 0},
|
||||
"replication_time": 0.06156301498413086}
|
||||
scout_content_acc = {
|
||||
"replication_last": 1531724672.644893,
|
||||
"replication_stats": {
|
||||
"no_change": 0, "rsync": 0, "success": 10, "deferred": 0,
|
||||
"attempted": 7, "ts_repl": 0, "remove": 0, "remote_merge": 0,
|
||||
"diff_capped": 0, "failure": 5, "hashmatch": 0, "diff": 0,
|
||||
"start": 1531724672.639242, "empty": 0},
|
||||
"replication_time": 0.005650997161865234}
|
||||
|
||||
self.swiftrecon.scout_returns = [
|
||||
(expected_url, scout_content_obj, 200),
|
||||
(expected_url, scout_content_cont, 200),
|
||||
(expected_url, scout_content_acc, 200)]
|
||||
|
||||
for server_type, content in (('object', scout_content_obj),
|
||||
('container', scout_content_cont),
|
||||
('account', scout_content_acc)):
|
||||
instance['server_type'] = server_type
|
||||
self.swiftrecon.replication_check(instance, server_type)
|
||||
self.assertTrue(self.swiftrecon.gauge_called)
|
||||
|
||||
for key in ('replication_last', 'replication_time'):
|
||||
metric = 'swift_recon.{0}.{1}'.format(server_type, key)
|
||||
self.assertIn(metric, self.swiftrecon.gauge_calls)
|
||||
if key == 'replication_last':
|
||||
# there is a hack to make epochs dates in grafana and that
|
||||
# is to time it by 1000
|
||||
content[key] = content[key] * 1000
|
||||
self.assertListEqual(self.swiftrecon.gauge_calls[metric],
|
||||
[content[key]])
|
||||
|
||||
# Currently we are only grabbing the following 3 values. As you
|
||||
# can see there are much more. Adding more should be done at some
|
||||
# point.
|
||||
for key in ('attempted', 'failure', 'success'):
|
||||
metric = 'swift_recon.{0}.replication.{1}'.format(server_type,
|
||||
key)
|
||||
self.assertIn(metric, self.swiftrecon.gauge_calls)
|
||||
self.assertListEqual(self.swiftrecon.gauge_calls[metric],
|
||||
[content['replication_stats'][key]])
|
||||
self.swiftrecon.reset_gauge()
|
Loading…
x
Reference in New Issue
Block a user