NRPE: Allow excluding queues from queue-size checks
Option '-e <vhost> <queue>' was added to the 'check_rabbitmq_queues.py' nrpe script to allow excluding selected queues when checking queue sizes. Corresponding option 'exclude_queues' was added to the charm config. By default, following queues are excluded: * event.sample * notifications_designate.info * notifications_designate.error * versioned_notifications.info * versioned_notifications.error Closes-Bug: #1811433 Change-Id: I57e297bb4323a3ab98da020bfcb1630889aac6d7
This commit is contained in:
parent
07ec03b5d7
commit
7acad5fdaa
|
@ -1,5 +1,4 @@
|
||||||
- project:
|
- project:
|
||||||
templates:
|
templates:
|
||||||
- python35-charm-jobs
|
- openstack-python3-charm-jobs
|
||||||
- openstack-python3-ussuri-jobs
|
|
||||||
- openstack-cover-jobs
|
- openstack-cover-jobs
|
||||||
|
|
14
config.yaml
14
config.yaml
|
@ -106,6 +106,20 @@ options:
|
||||||
Wildcards '*' are accepted to monitor all vhosts and/or queues.
|
Wildcards '*' are accepted to monitor all vhosts and/or queues.
|
||||||
In case of multiple matches, only the first will apply: wildcards should
|
In case of multiple matches, only the first will apply: wildcards should
|
||||||
therefore be used last in order to avoid unexpected behavior.
|
therefore be used last in order to avoid unexpected behavior.
|
||||||
|
exclude_queues:
|
||||||
|
type: string
|
||||||
|
default: "[]"
|
||||||
|
description: |
|
||||||
|
List of RabbitMQ queues that should be skipped when checking thresholds.
|
||||||
|
Interpreted as YAML in format [<vhost>, <queue>]
|
||||||
|
Per-queue thresholds can be expressed as a multi-line YAML array:
|
||||||
|
- ['/', 'queue1']
|
||||||
|
- ['/', 'queue2']
|
||||||
|
Or as a list of lists:
|
||||||
|
[['/', 'queue1'], ['/', 'queue2']]
|
||||||
|
Wildcards '*' are accepted to exclude, for example, single queue on all
|
||||||
|
hosts. Note that the wildcard asterisk must be double-escaped. Example:
|
||||||
|
[['\\*', 'queue1']]
|
||||||
connection-backlog:
|
connection-backlog:
|
||||||
type: int
|
type: int
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -38,12 +38,22 @@ def gen_stats(data_lines):
|
||||||
yield vhost, queue, int(m_all)
|
yield vhost, queue, int(m_all)
|
||||||
|
|
||||||
|
|
||||||
def collate_stats(stats, limits):
|
def collate_stats(stats, limits, exclude):
|
||||||
# Create a dict with stats collated according to the definitions in the
|
# Create a dict with stats collated according to the definitions in the
|
||||||
# limits file. If none of the definitions in the limits file is matched,
|
# limits file. If none of the definitions in the limits file is matched,
|
||||||
# store the stat without collating.
|
# store the stat without collating.
|
||||||
collated = defaultdict(lambda: 0)
|
collated = defaultdict(lambda: 0)
|
||||||
for vhost, queue, m_all in stats:
|
for vhost, queue, m_all in stats:
|
||||||
|
skip = False
|
||||||
|
|
||||||
|
for e_vhost, e_queue in exclude:
|
||||||
|
if fnmatchcase(vhost, e_vhost) and fnmatchcase(queue, e_queue):
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if skip:
|
||||||
|
continue
|
||||||
|
|
||||||
for l_vhost, l_queue, _, _ in limits:
|
for l_vhost, l_queue, _, _ in limits:
|
||||||
if fnmatchcase(vhost, l_vhost) and fnmatchcase(queue, l_queue):
|
if fnmatchcase(vhost, l_vhost) and fnmatchcase(queue, l_queue):
|
||||||
collated[l_vhost, l_queue] += m_all
|
collated[l_vhost, l_queue] += m_all
|
||||||
|
@ -120,7 +130,18 @@ if __name__ == "__main__":
|
||||||
action='append',
|
action='append',
|
||||||
required=True,
|
required=True,
|
||||||
metavar=('vhost', 'queue', 'warn', 'crit'),
|
metavar=('vhost', 'queue', 'warn', 'crit'),
|
||||||
help=('Vhost and queue to check. Can be used multiple times'))
|
help='Vhost and queue to check. Can be used multiple times'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-e',
|
||||||
|
nargs=2,
|
||||||
|
action='append',
|
||||||
|
required=False,
|
||||||
|
default=[],
|
||||||
|
metavar=('vhost', 'queue'),
|
||||||
|
help='Vhost and queue to exclude from checks. Can be used multiple \
|
||||||
|
times'
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'stats_file',
|
'stats_file',
|
||||||
nargs='*',
|
nargs='*',
|
||||||
|
@ -133,7 +154,7 @@ if __name__ == "__main__":
|
||||||
chain.from_iterable(
|
chain.from_iterable(
|
||||||
gen_data_lines(filename) for filename in args.stats_file))
|
gen_data_lines(filename) for filename in args.stats_file))
|
||||||
# Collate stats according to limit definitions and check.
|
# Collate stats according to limit definitions and check.
|
||||||
stats_collated = collate_stats(stats, args.c)
|
stats_collated = collate_stats(stats, args.c, args.e)
|
||||||
stats_checked = check_stats(stats_collated, args.c)
|
stats_checked = check_stats(stats_collated, args.c)
|
||||||
criticals, warnings = [], []
|
criticals, warnings = [], []
|
||||||
for queue, vhost, message_no, status in stats_checked:
|
for queue, vhost, message_no, status in stats_checked:
|
||||||
|
|
|
@ -1403,6 +1403,9 @@ def nrpe_update_queues_check(nrpe_compat, rabbit_dir):
|
||||||
# If value of queue_thresholds is incorrect we want the hook to fail
|
# If value of queue_thresholds is incorrect we want the hook to fail
|
||||||
for item in yaml.safe_load(config('queue_thresholds')):
|
for item in yaml.safe_load(config('queue_thresholds')):
|
||||||
cmd += ' -c "{}" "{}" {} {}'.format(*item)
|
cmd += ' -c "{}" "{}" {} {}'.format(*item)
|
||||||
|
for item in yaml.safe_load(config('exclude_queues')):
|
||||||
|
cmd += ' -e "{}" "{}"'.format(*item)
|
||||||
|
|
||||||
nrpe_compat.add_check(
|
nrpe_compat.add_check(
|
||||||
shortname=RABBIT_USER + '_queue',
|
shortname=RABBIT_USER + '_queue',
|
||||||
description='Check RabbitMQ Queues',
|
description='Check RabbitMQ Queues',
|
||||||
|
|
|
@ -1157,13 +1157,25 @@ class UtilsTests(CharmTestCase):
|
||||||
|
|
||||||
# call with stats_cron_schedule set to '*/5 * * * *'
|
# call with stats_cron_schedule set to '*/5 * * * *'
|
||||||
self.test_config.set('stats_cron_schedule', '*/5 * * * *')
|
self.test_config.set('stats_cron_schedule', '*/5 * * * *')
|
||||||
|
# set some queues to exclude to test proper command generation
|
||||||
|
# with '-e' parameter
|
||||||
|
self.test_config.set('exclude_queues',
|
||||||
|
"[['\\*', 'event.sample'], "
|
||||||
|
"['\\*', 'notifications_designate.info']]")
|
||||||
rabbit_utils.nrpe_update_queues_check(self.nrpe_compat, self.tmp_dir)
|
rabbit_utils.nrpe_update_queues_check(self.nrpe_compat, self.tmp_dir)
|
||||||
|
default_excludes = [
|
||||||
|
('\\*', 'event.sample'),
|
||||||
|
('\\*', 'notifications_designate.info'),
|
||||||
|
]
|
||||||
|
exclude_queues = ''
|
||||||
|
for vhost, queue in default_excludes:
|
||||||
|
exclude_queues += '-e "{}" "{}" '.format(vhost, queue)
|
||||||
self.nrpe_compat.add_check.assert_called_with(
|
self.nrpe_compat.add_check.assert_called_with(
|
||||||
shortname='rabbitmq_queue',
|
shortname='rabbitmq_queue',
|
||||||
description='Check RabbitMQ Queues',
|
description='Check RabbitMQ Queues',
|
||||||
check_cmd='{}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 '
|
check_cmd='{0}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 {1}'
|
||||||
'{}/data/test_queue_stats.dat'.format(self.tmp_dir,
|
'{0}/data/test_queue_stats.dat'.format(self.tmp_dir,
|
||||||
self.tmp_dir))
|
exclude_queues))
|
||||||
self.nrpe_compat.remove_check.assert_not_called()
|
self.nrpe_compat.remove_check.assert_not_called()
|
||||||
|
|
||||||
self.nrpe_compat.reset_mock()
|
self.nrpe_compat.reset_mock()
|
||||||
|
|
Loading…
Reference in New Issue