From e3502b2c9c5f65ee8c0f3ed1aff8ef66c94de2f1 Mon Sep 17 00:00:00 2001 From: Edward Hope-Morley Date: Fri, 30 Jun 2023 17:27:22 +0100 Subject: [PATCH] Add ovn cert nrpe check Certs are root readable so we use a cron job to perform the check and save state for an nrpe check to read and send back to nagios. Closes-Bug: #1979539 Change-Id: I7c7cd238ddf3fd9f92bfa5879d19d78c091cf2ac --- pylintrc | 56 +++++++++++++ src/files/nagios/nrpe_check_ovn_certs.py | 44 +++++++++++ src/files/scripts/check_ovn_certs.py | 95 +++++++++++++++++++++++ src/lib/charm/openstack/ovn_central.py | 28 +++++++ tox.ini | 11 ++- unit_tests/test_lib_charms_ovn_central.py | 33 ++++---- 6 files changed, 252 insertions(+), 15 deletions(-) create mode 100644 pylintrc create mode 100755 src/files/nagios/nrpe_check_ovn_certs.py create mode 100755 src/files/scripts/check_ovn_certs.py diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..8419a10 --- /dev/null +++ b/pylintrc @@ -0,0 +1,56 @@ +[MAIN] +jobs=0 +ignore=.git + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins=pylint.extensions.no_self_use + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +[FORMAT] +max-line-length=79 + +[REPORTS] +#reports=yes +score=yes + +[MESSAGES CONTROL] +disable= + fixme, + invalid-name, + no-value-for-parameter, + pointless-statement, + missing-module-docstring, + missing-class-docstring, + missing-function-docstring, + too-many-arguments, + too-many-locals, + too-many-branches, + too-many-instance-attributes, + too-many-ancestors, + too-many-public-methods, + too-many-lines, + too-many-nested-blocks, + too-many-statements, + protected-access, + super-init-not-called, + useless-object-inheritance, + unidiomatic-typecheck, + unsubscriptable-object, + inconsistent-return-statements, + attribute-defined-outside-init, + too-few-public-methods, + abstract-method, + no-self-use, + broad-except, + unnecessary-lambda, + arguments-differ, + broad-exception-raised, + unspecified-encoding, + consider-using-f-string, + consider-using-with, + consider-using-dict-items, + unused-private-member, diff --git a/src/files/nagios/nrpe_check_ovn_certs.py b/src/files/nagios/nrpe_check_ovn_certs.py new file mode 100755 index 0000000..9c0edf8 --- /dev/null +++ b/src/files/nagios/nrpe_check_ovn_certs.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# Copyright (C) 2023 Canonical +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import sys +from datetime import datetime, timedelta + +NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data' + + +if __name__ == "__main__": + output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json') + if os.path.exists(output_path): + with open(output_path) as fd: + try: + status = json.loads(fd.read()) + ts = datetime.strptime(status['last_updated'], + "%Y-%m-%d %H:%M:%S") + if datetime.now() - ts > timedelta(days=1): + print("ovn cert check status is more than 24 hours old " + "(last_updated={})".format(status['last_updated'])) + sys.exit(1) + + print(status['message']) + sys.exit(status['exit_code']) + except ValueError: + print("invalid check output") + else: + print("no info available") + + sys.exit(0) diff --git a/src/files/scripts/check_ovn_certs.py b/src/files/scripts/check_ovn_certs.py new file mode 100755 index 0000000..420fbdf --- /dev/null +++ b/src/files/scripts/check_ovn_certs.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright (C) 2023 Canonical +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +from datetime import datetime + +from cryptography.hazmat.backends import default_backend +from cryptography import x509 + +NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data' + + +class SSLCertificate(object): + def __init__(self, path): + self.path = path + + @property + def cert(self): + with open(self.path, "rb") as fd: + return fd.read() + + @property + def expiry_date(self): + cert = x509.load_pem_x509_certificate(self.cert, default_backend()) + return cert.not_valid_after + + @property + def days_remaining(self): + return int((self.expiry_date - datetime.now()).days) + + +def check_ovn_certs(): + output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json') + if not os.path.isdir(NAGIOS_PLUGIN_DATA): + os.makedirs(NAGIOS_PLUGIN_DATA) + + exit_code = 0 + for cert in ['/etc/ovn/cert_host', '/etc/ovn/ovn-central.crt']: + if not os.path.exists(cert): + message = "cert '{}' does not exist.".format(cert) + exit_code = 2 + break + + if not os.access(cert, os.R_OK): + message = "cert '{}' is not readable.".format(cert) + exit_code = 2 + break + + try: + remaining_days = SSLCertificate(cert).days_remaining + if remaining_days <= 0: + message = "{}: cert has expired.".format(cert) + exit_code = 2 + break + + if remaining_days < 10: + message = ("{}: cert will expire soon (less than 10 days).". + format(cert)) + exit_code = 1 + break + except Exception as exc: + message = "failed to check cert '{}': {}".format(cert, str(exc)) + exit_code = 1 + else: + message = "all certs healthy" + exit_code = 0 + + ts = datetime.now() + with open(output_path, 'w') as fd: + fd.write(json.dumps({'message': message, + 'exit_code': exit_code, + 'last_updated': + "{}-{}-{} {}:{}:{}".format(ts.year, ts.month, + ts.day, ts.hour, + ts.minute, + ts.second)})) + + os.chmod(output_path, 644) + + +if __name__ == "__main__": + check_ovn_certs() diff --git a/src/lib/charm/openstack/ovn_central.py b/src/lib/charm/openstack/ovn_central.py index 4c8c81e..c8cc35e 100644 --- a/src/lib/charm/openstack/ovn_central.py +++ b/src/lib/charm/openstack/ovn_central.py @@ -19,6 +19,7 @@ import subprocess import time import charmhelpers.core as ch_core +from charmhelpers.core.host import rsync, write_file import charmhelpers.contrib.charmsupport.nrpe as nrpe import charmhelpers.contrib.network.ovs.ovn as ch_ovn import charmhelpers.contrib.network.ovs.ovsdb as ch_ovsdb @@ -37,6 +38,10 @@ from charms.layer import snap # bus discovery and action exection charms_openstack.charm.use_defaults('charm.default-select-release') +NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' +SCRIPTS_DIR = '/usr/local/bin' +CERTCHECK_CRONFILE = '/etc/cron.d/ovn-central-cert-checks' +CRONJOB_CMD = "{schedule} root {command} 2>&1 | logger -p local0.notice\n" PEER_RELATION = 'ovsdb-peer' CERT_RELATION = 'certificates' @@ -769,10 +774,33 @@ class BaseOVNCentralCharm(charms_openstack.charm.OpenStackCharm): hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() charm_nrpe = nrpe.NRPE(hostname=hostname) + self.add_nrpe_certs_check(charm_nrpe) nrpe.add_init_service_checks( charm_nrpe, self.nrpe_check_services, current_unit) charm_nrpe.write() + def add_nrpe_certs_check(self, charm_nrpe): + script = 'nrpe_check_ovn_certs.py' + src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', script) + dst = os.path.join(NAGIOS_PLUGINS, script) + rsync(src, dst) + charm_nrpe.add_check( + shortname='check_ovn_certs', + description='Check that ovn certs are valid.', + check_cmd=script + ) + # Need to install this as a system package since it is needed by the + # cron script that runs outside of the charm. + ch_fetch.apt_install(['python3-cryptography']) + script = 'check_ovn_certs.py' + src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'scripts', script) + dst = os.path.join(SCRIPTS_DIR, script) + rsync(src, dst) + cronjob = CRONJOB_CMD.format( + schedule='*/5 * * * *', + command=dst) + write_file(CERTCHECK_CRONFILE, cronjob) + def custom_assess_status_check(self): """Report deferred events in charm status message.""" state = None diff --git a/tox.ini b/tox.ini index c2be05e..b750191 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ # https://github.com/openstack-charmers/release-tools [tox] -envlist = pep8,py3 +envlist = pep8,pylint,py3 # NOTE: Avoid build/test env pollution by not enabling sitepackages. sitepackages = False # NOTE: Avoid false positives by not skipping missing interpreters. @@ -63,6 +63,15 @@ deps = flake8==3.9.2 git+https://github.com/juju/charm-tools.git commands = flake8 {posargs} src unit_tests +# This is added for manual testing and is not run by the gate. +[testenv:pylint] +deps = + pylint==2.17.4 + cryptography +commands = pylint -v --rcfile={toxinidir}/pylintrc \ + {toxinidir}/src/files/nagios/nrpe_check_ovn_certs.py \ + {toxinidir}/src/files/scripts/check_ovn_certs.py + [testenv:cover] # Technique based heavily upon # https://github.com/openstack/nova/blob/master/tox.ini diff --git a/unit_tests/test_lib_charms_ovn_central.py b/unit_tests/test_lib_charms_ovn_central.py index 28e1830..9bac6be 100644 --- a/unit_tests/test_lib_charms_ovn_central.py +++ b/unit_tests/test_lib_charms_ovn_central.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import collections import io +import tempfile import unittest.mock as mock import charms_openstack.test_utils as test_utils @@ -606,20 +608,23 @@ class TestOVNCentralCharm(Helper): ]) def test_render_nrpe(self): - self.patch_object(ovn_central.nrpe, 'NRPE') - self.patch_object(ovn_central.nrpe, 'add_init_service_checks') - self.target.render_nrpe() - # Note that this list is valid for Ussuri - self.add_init_service_checks.assert_has_calls([ - mock.call().add_init_service_checks( - mock.ANY, - ['ovn-northd', 'ovn-ovsdb-server-nb', 'ovn-ovsdb-server-sb'], - mock.ANY - ), - ]) - self.NRPE.assert_has_calls([ - mock.call().write(), - ]) + with tempfile.TemporaryDirectory() as dtmp: + os.environ['CHARM_DIR'] = dtmp + self.patch_object(ovn_central.nrpe, 'NRPE') + self.patch_object(ovn_central.nrpe, 'add_init_service_checks') + self.target.render_nrpe() + # Note that this list is valid for Ussuri + self.add_init_service_checks.assert_has_calls([ + mock.call().add_init_service_checks( + mock.ANY, + ['ovn-northd', 'ovn-ovsdb-server-nb', + 'ovn-ovsdb-server-sb'], + mock.ANY + ), + ]) + self.NRPE.assert_has_calls([ + mock.call().write(), + ]) def test_configure_deferred_restarts(self): self.patch_object(