From 7acad5fdaaed903e2b715b79dbc93a7583df841f Mon Sep 17 00:00:00 2001
From: Martin Kalcok <martin.kalcok@canonical.com>
Date: Tue, 8 Dec 2020 15:51:32 +0100
Subject: [PATCH] NRPE: Allow excluding queues from queue-size checks

Option '-e <vhost>  <queue>' was added to the 'check_rabbitmq_queues.py'
nrpe script to allow excluding selected queues when checking queue
sizes. Corresponding option 'exclude_queues' was added to the
charm config.
By default, following queues are excluded:
 * event.sample
 * notifications_designate.info
 * notifications_designate.error
 * versioned_notifications.info
 * versioned_notifications.error

Closes-Bug: #1811433
Change-Id: I57e297bb4323a3ab98da020bfcb1630889aac6d7
---
 .zuul.yaml                      |  3 +--
 config.yaml                     | 14 ++++++++++++++
 files/check_rabbitmq_queues.py  | 27 ++++++++++++++++++++++++---
 hooks/rabbit_utils.py           |  3 +++
 unit_tests/test_rabbit_utils.py | 18 +++++++++++++++---
 5 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/.zuul.yaml b/.zuul.yaml
index b3037e94..fd20909e 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -1,5 +1,4 @@
 - project:
     templates:
-      - python35-charm-jobs
-      - openstack-python3-ussuri-jobs
+      - openstack-python3-charm-jobs
       - openstack-cover-jobs
diff --git a/config.yaml b/config.yaml
index bcbaf97e..f71d7c5e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -106,6 +106,20 @@ options:
       Wildcards '*' are accepted to monitor all vhosts and/or queues.
       In case of multiple matches, only the first will apply: wildcards should
       therefore be used last in order to avoid unexpected behavior.
+  exclude_queues:
+    type: string
+    default: "[]"
+    description: |
+      List of RabbitMQ queues that should be skipped when checking thresholds.
+      Interpreted as YAML in format [<vhost>, <queue>]
+      Per-queue thresholds can be expressed as a multi-line YAML array:
+      - ['/', 'queue1']
+      - ['/', 'queue2']
+      Or as a list of lists:
+      [['/', 'queue1'], ['/', 'queue2']]
+      Wildcards '*' are accepted to exclude, for example, single queue on all
+      hosts. Note that the wildcard asterisk must be double-escaped. Example:
+      [['\\*', 'queue1']]
   connection-backlog:
     type: int
     default:
diff --git a/files/check_rabbitmq_queues.py b/files/check_rabbitmq_queues.py
index 962ccc8e..25773553 100755
--- a/files/check_rabbitmq_queues.py
+++ b/files/check_rabbitmq_queues.py
@@ -38,12 +38,22 @@ def gen_stats(data_lines):
         yield vhost, queue, int(m_all)
 
 
-def collate_stats(stats, limits):
+def collate_stats(stats, limits, exclude):
     # Create a dict with stats collated according to the definitions in the
     # limits file. If none of the definitions in the limits file is matched,
     # store the stat without collating.
     collated = defaultdict(lambda: 0)
     for vhost, queue, m_all in stats:
+        skip = False
+
+        for e_vhost, e_queue in exclude:
+            if fnmatchcase(vhost, e_vhost) and fnmatchcase(queue, e_queue):
+                skip = True
+                break
+
+        if skip:
+            continue
+
         for l_vhost, l_queue, _, _ in limits:
             if fnmatchcase(vhost, l_vhost) and fnmatchcase(queue, l_queue):
                 collated[l_vhost, l_queue] += m_all
@@ -120,7 +130,18 @@ if __name__ == "__main__":
         action='append',
         required=True,
         metavar=('vhost', 'queue', 'warn', 'crit'),
-        help=('Vhost and queue to check. Can be used multiple times'))
+        help='Vhost and queue to check. Can be used multiple times'
+    )
+    parser.add_argument(
+        '-e',
+        nargs=2,
+        action='append',
+        required=False,
+        default=[],
+        metavar=('vhost', 'queue'),
+        help='Vhost and queue to exclude from checks. Can be used multiple \
+        times'
+    )
     parser.add_argument(
         'stats_file',
         nargs='*',
@@ -133,7 +154,7 @@ if __name__ == "__main__":
         chain.from_iterable(
             gen_data_lines(filename) for filename in args.stats_file))
     # Collate stats according to limit definitions and check.
-    stats_collated = collate_stats(stats, args.c)
+    stats_collated = collate_stats(stats, args.c, args.e)
     stats_checked = check_stats(stats_collated, args.c)
     criticals, warnings = [], []
     for queue, vhost, message_no, status in stats_checked:
diff --git a/hooks/rabbit_utils.py b/hooks/rabbit_utils.py
index 3f485360..44ce1b8b 100644
--- a/hooks/rabbit_utils.py
+++ b/hooks/rabbit_utils.py
@@ -1403,6 +1403,9 @@ def nrpe_update_queues_check(nrpe_compat, rabbit_dir):
         # If value of queue_thresholds is incorrect we want the hook to fail
         for item in yaml.safe_load(config('queue_thresholds')):
             cmd += ' -c "{}" "{}" {} {}'.format(*item)
+        for item in yaml.safe_load(config('exclude_queues')):
+            cmd += ' -e "{}" "{}"'.format(*item)
+
         nrpe_compat.add_check(
             shortname=RABBIT_USER + '_queue',
             description='Check RabbitMQ Queues',
diff --git a/unit_tests/test_rabbit_utils.py b/unit_tests/test_rabbit_utils.py
index c572729c..c7cf2363 100644
--- a/unit_tests/test_rabbit_utils.py
+++ b/unit_tests/test_rabbit_utils.py
@@ -1157,13 +1157,25 @@ class UtilsTests(CharmTestCase):
 
         # call with stats_cron_schedule set to '*/5 * * * *'
         self.test_config.set('stats_cron_schedule', '*/5 * * * *')
+        # set some queues to exclude to test proper command generation
+        # with '-e' parameter
+        self.test_config.set('exclude_queues',
+                             "[['\\*', 'event.sample'], "
+                             "['\\*', 'notifications_designate.info']]")
         rabbit_utils.nrpe_update_queues_check(self.nrpe_compat, self.tmp_dir)
+        default_excludes = [
+            ('\\*', 'event.sample'),
+            ('\\*', 'notifications_designate.info'),
+        ]
+        exclude_queues = ''
+        for vhost, queue in default_excludes:
+            exclude_queues += '-e "{}" "{}" '.format(vhost, queue)
         self.nrpe_compat.add_check.assert_called_with(
             shortname='rabbitmq_queue',
             description='Check RabbitMQ Queues',
-            check_cmd='{}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 '
-                      '{}/data/test_queue_stats.dat'.format(self.tmp_dir,
-                                                            self.tmp_dir))
+            check_cmd='{0}/check_rabbitmq_queues.py -c "\\*" "\\*" 100 200 {1}'
+                      '{0}/data/test_queue_stats.dat'.format(self.tmp_dir,
+                                                             exclude_queues))
         self.nrpe_compat.remove_check.assert_not_called()
 
         self.nrpe_compat.reset_mock()