From 7acad5fdaaed903e2b715b79dbc93a7583df841f Mon Sep 17 00:00:00 2001 From: Martin Kalcok Date: Tue, 8 Dec 2020 15:51:32 +0100 Subject: [PATCH] NRPE: Allow excluding queues from queue-size checks Option '-e ' was added to the 'check_rabbitmq_queues.py' nrpe script to allow excluding selected queues when checking queue sizes. Corresponding option 'exclude_queues' was added to the charm config. By default, following queues are excluded: * event.sample * notifications_designate.info * notifications_designate.error * versioned_notifications.info * versioned_notifications.error Closes-Bug: #1811433 Change-Id: I57e297bb4323a3ab98da020bfcb1630889aac6d7 --- .zuul.yaml | 3 +-- config.yaml | 14 ++++++++++++++ files/check_rabbitmq_queues.py | 27 ++++++++++++++++++++++++--- hooks/rabbit_utils.py | 3 +++ unit_tests/test_rabbit_utils.py | 18 +++++++++++++++--- 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/.zuul.yaml b/.zuul.yaml index b3037e94..fd20909e 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -1,5 +1,4 @@ - project: templates: - - python35-charm-jobs - - openstack-python3-ussuri-jobs + - openstack-python3-charm-jobs - openstack-cover-jobs diff --git a/config.yaml b/config.yaml index bcbaf97e..f71d7c5e 100644 --- a/config.yaml +++ b/config.yaml @@ -106,6 +106,20 @@ options: Wildcards '*' are accepted to monitor all vhosts and/or queues. In case of multiple matches, only the first will apply: wildcards should therefore be used last in order to avoid unexpected behavior. + exclude_queues: + type: string + default: "[]" + description: | + List of RabbitMQ queues that should be skipped when checking thresholds. + Interpreted as YAML in format [, ] + Per-queue thresholds can be expressed as a multi-line YAML array: + - ['/', 'queue1'] + - ['/', 'queue2'] + Or as a list of lists: + [['/', 'queue1'], ['/', 'queue2']] + Wildcards '*' are accepted to exclude, for example, single queue on all + hosts. Note that the wildcard asterisk must be double-escaped. Example: + [['\\*', 'queue1']] connection-backlog: type: int default: diff --git a/files/check_rabbitmq_queues.py b/files/check_rabbitmq_queues.py index 962ccc8e..25773553 100755 --- a/files/check_rabbitmq_queues.py +++ b/files/check_rabbitmq_queues.py @@ -38,12 +38,22 @@ def gen_stats(data_lines): yield vhost, queue, int(m_all) -def collate_stats(stats, limits): +def collate_stats(stats, limits, exclude): # Create a dict with stats collated according to the definitions in the # limits file. If none of the definitions in the limits file is matched, # store the stat without collating. collated = defaultdict(lambda: 0) for vhost, queue, m_all in stats: + skip = False + + for e_vhost, e_queue in exclude: + if fnmatchcase(vhost, e_vhost) and fnmatchcase(queue, e_queue): + skip = True + break + + if skip: + continue + for l_vhost, l_queue, _, _ in limits: if fnmatchcase(vhost, l_vhost) and fnmatchcase(queue, l_queue): collated[l_vhost, l_queue] += m_all @@ -120,7 +130,18 @@ if __name__ == "__main__": action='append', required=True, metavar=('vhost', 'queue', 'warn', 'crit'), - help=('Vhost and queue to check. Can be used multiple times')) + help='Vhost and queue to check. Can be used multiple times' + ) + parser.add_argument( + '-e', + nargs=2, + action='append', + required=False, + default=[], + metavar=('vhost', 'queue'), + help='Vhost and queue to exclude from checks. Can be used multiple \ + times' + ) parser.add_argument( 'stats_file', nargs='*', @@ -133,7 +154,7 @@ if __name__ == "__main__": chain.from_iterable( gen_data_lines(filename) for filename in args.stats_file)) # Collate stats according to limit definitions and check. - stats_collated = collate_stats(stats, args.c) + stats_collated = collate_stats(stats, args.c, args.e) stats_checked = check_stats(stats_collated, args.c) criticals, warnings = [], [] for queue, vhost, message_no, status in stats_checked: diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py index 3f485360..44ce1b8b 100644 --- a/hooks/rabbit_utils.py +++ b/hooks/rabbit_utils.py @@ -1403,6 +1403,9 @@ def nrpe_update_queues_check(nrpe_compat, rabbit_dir): # If value of queue_thresholds is incorrect we want the hook to fail for item in yaml.safe_load(config('queue_thresholds')): cmd += ' -c "{}" "{}" {} {}'.format(*item) + for item in yaml.safe_load(config('exclude_queues')): + cmd += ' -e "{}" "{}"'.format(*item) + nrpe_compat.add_check( shortname=RABBIT_USER + '_queue', description='Check RabbitMQ Queues', diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py index c572729c..c7cf2363 100644 --- a/unit_tests/test_rabbit_utils.py +++ b/unit_tests/test_rabbit_utils.py @@ -1157,13 +1157,25 @@ class UtilsTests(CharmTestCase): # call with stats_cron_schedule set to '*/5 * * * *' self.test_config.set('stats_cron_schedule', '*/5 * * * *') + # set some queues to exclude to test proper command generation + # with '-e' parameter + self.test_config.set('exclude_queues', + "[['\\*', 'event.sample'], " + "['\\*', 'notifications_designate.info']]") rabbit_utils.nrpe_update_queues_check(self.nrpe_compat, self.tmp_dir) + default_excludes = [ + ('\\*', 'event.sample'), + ('\\*', 'notifications_designate.info'), + ] + exclude_queues = '' + for vhost, queue in default_excludes: + exclude_queues += '-e "{}" "{}" '.format(vhost, queue) self.nrpe_compat.add_check.assert_called_with( shortname='rabbitmq_queue', description='Check RabbitMQ Queues', - check_cmd='{}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 ' - '{}/data/test_queue_stats.dat'.format(self.tmp_dir, - self.tmp_dir)) + check_cmd='{0}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 {1}' + '{0}/data/test_queue_stats.dat'.format(self.tmp_dir, + exclude_queues)) self.nrpe_compat.remove_check.assert_not_called() self.nrpe_compat.reset_mock()