Add ovn cert nrpe check

Certs are root readable so we use a cron job to perform
the check and save state for an nrpe check to read and
send back to nagios.

Closes-Bug: #1979539
Change-Id: I7c7cd238ddf3fd9f92bfa5879d19d78c091cf2ac
This commit is contained in:
Edward Hope-Morley 2023-06-30 17:27:22 +01:00
parent db9216084f
commit e3502b2c9c
6 changed files with 252 additions and 15 deletions

56
pylintrc Normal file
View File

@ -0,0 +1,56 @@
[MAIN]
jobs=0
ignore=.git
# List of plugins (as comma separated values of python module names) to load,
# usually to register additional checkers.
load-plugins=pylint.extensions.no_self_use
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
[FORMAT]
max-line-length=79
[REPORTS]
#reports=yes
score=yes
[MESSAGES CONTROL]
disable=
fixme,
invalid-name,
no-value-for-parameter,
pointless-statement,
missing-module-docstring,
missing-class-docstring,
missing-function-docstring,
too-many-arguments,
too-many-locals,
too-many-branches,
too-many-instance-attributes,
too-many-ancestors,
too-many-public-methods,
too-many-lines,
too-many-nested-blocks,
too-many-statements,
protected-access,
super-init-not-called,
useless-object-inheritance,
unidiomatic-typecheck,
unsubscriptable-object,
inconsistent-return-statements,
attribute-defined-outside-init,
too-few-public-methods,
abstract-method,
no-self-use,
broad-except,
unnecessary-lambda,
arguments-differ,
broad-exception-raised,
unspecified-encoding,
consider-using-f-string,
consider-using-with,
consider-using-dict-items,
unused-private-member,

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Canonical
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import sys
from datetime import datetime, timedelta
NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data'
if __name__ == "__main__":
output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json')
if os.path.exists(output_path):
with open(output_path) as fd:
try:
status = json.loads(fd.read())
ts = datetime.strptime(status['last_updated'],
"%Y-%m-%d %H:%M:%S")
if datetime.now() - ts > timedelta(days=1):
print("ovn cert check status is more than 24 hours old "
"(last_updated={})".format(status['last_updated']))
sys.exit(1)
print(status['message'])
sys.exit(status['exit_code'])
except ValueError:
print("invalid check output")
else:
print("no info available")
sys.exit(0)

View File

@ -0,0 +1,95 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Canonical
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
from datetime import datetime
from cryptography.hazmat.backends import default_backend
from cryptography import x509
NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data'
class SSLCertificate(object):
def __init__(self, path):
self.path = path
@property
def cert(self):
with open(self.path, "rb") as fd:
return fd.read()
@property
def expiry_date(self):
cert = x509.load_pem_x509_certificate(self.cert, default_backend())
return cert.not_valid_after
@property
def days_remaining(self):
return int((self.expiry_date - datetime.now()).days)
def check_ovn_certs():
output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json')
if not os.path.isdir(NAGIOS_PLUGIN_DATA):
os.makedirs(NAGIOS_PLUGIN_DATA)
exit_code = 0
for cert in ['/etc/ovn/cert_host', '/etc/ovn/ovn-central.crt']:
if not os.path.exists(cert):
message = "cert '{}' does not exist.".format(cert)
exit_code = 2
break
if not os.access(cert, os.R_OK):
message = "cert '{}' is not readable.".format(cert)
exit_code = 2
break
try:
remaining_days = SSLCertificate(cert).days_remaining
if remaining_days <= 0:
message = "{}: cert has expired.".format(cert)
exit_code = 2
break
if remaining_days < 10:
message = ("{}: cert will expire soon (less than 10 days).".
format(cert))
exit_code = 1
break
except Exception as exc:
message = "failed to check cert '{}': {}".format(cert, str(exc))
exit_code = 1
else:
message = "all certs healthy"
exit_code = 0
ts = datetime.now()
with open(output_path, 'w') as fd:
fd.write(json.dumps({'message': message,
'exit_code': exit_code,
'last_updated':
"{}-{}-{} {}:{}:{}".format(ts.year, ts.month,
ts.day, ts.hour,
ts.minute,
ts.second)}))
os.chmod(output_path, 644)
if __name__ == "__main__":
check_ovn_certs()

View File

@ -19,6 +19,7 @@ import subprocess
import time
import charmhelpers.core as ch_core
from charmhelpers.core.host import rsync, write_file
import charmhelpers.contrib.charmsupport.nrpe as nrpe
import charmhelpers.contrib.network.ovs.ovn as ch_ovn
import charmhelpers.contrib.network.ovs.ovsdb as ch_ovsdb
@ -37,6 +38,10 @@ from charms.layer import snap
# bus discovery and action exection
charms_openstack.charm.use_defaults('charm.default-select-release')
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
SCRIPTS_DIR = '/usr/local/bin'
CERTCHECK_CRONFILE = '/etc/cron.d/ovn-central-cert-checks'
CRONJOB_CMD = "{schedule} root {command} 2>&1 | logger -p local0.notice\n"
PEER_RELATION = 'ovsdb-peer'
CERT_RELATION = 'certificates'
@ -769,10 +774,33 @@ class BaseOVNCentralCharm(charms_openstack.charm.OpenStackCharm):
hostname = nrpe.get_nagios_hostname()
current_unit = nrpe.get_nagios_unit_name()
charm_nrpe = nrpe.NRPE(hostname=hostname)
self.add_nrpe_certs_check(charm_nrpe)
nrpe.add_init_service_checks(
charm_nrpe, self.nrpe_check_services, current_unit)
charm_nrpe.write()
def add_nrpe_certs_check(self, charm_nrpe):
script = 'nrpe_check_ovn_certs.py'
src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', script)
dst = os.path.join(NAGIOS_PLUGINS, script)
rsync(src, dst)
charm_nrpe.add_check(
shortname='check_ovn_certs',
description='Check that ovn certs are valid.',
check_cmd=script
)
# Need to install this as a system package since it is needed by the
# cron script that runs outside of the charm.
ch_fetch.apt_install(['python3-cryptography'])
script = 'check_ovn_certs.py'
src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'scripts', script)
dst = os.path.join(SCRIPTS_DIR, script)
rsync(src, dst)
cronjob = CRONJOB_CMD.format(
schedule='*/5 * * * *',
command=dst)
write_file(CERTCHECK_CRONFILE, cronjob)
def custom_assess_status_check(self):
"""Report deferred events in charm status message."""
state = None

11
tox.ini
View File

@ -5,7 +5,7 @@
# https://github.com/openstack-charmers/release-tools
[tox]
envlist = pep8,py3
envlist = pep8,pylint,py3
# NOTE: Avoid build/test env pollution by not enabling sitepackages.
sitepackages = False
# NOTE: Avoid false positives by not skipping missing interpreters.
@ -63,6 +63,15 @@ deps = flake8==3.9.2
git+https://github.com/juju/charm-tools.git
commands = flake8 {posargs} src unit_tests
# This is added for manual testing and is not run by the gate.
[testenv:pylint]
deps =
pylint==2.17.4
cryptography
commands = pylint -v --rcfile={toxinidir}/pylintrc \
{toxinidir}/src/files/nagios/nrpe_check_ovn_certs.py \
{toxinidir}/src/files/scripts/check_ovn_certs.py
[testenv:cover]
# Technique based heavily upon
# https://github.com/openstack/nova/blob/master/tox.ini

View File

@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import collections
import io
import tempfile
import unittest.mock as mock
import charms_openstack.test_utils as test_utils
@ -606,20 +608,23 @@ class TestOVNCentralCharm(Helper):
])
def test_render_nrpe(self):
self.patch_object(ovn_central.nrpe, 'NRPE')
self.patch_object(ovn_central.nrpe, 'add_init_service_checks')
self.target.render_nrpe()
# Note that this list is valid for Ussuri
self.add_init_service_checks.assert_has_calls([
mock.call().add_init_service_checks(
mock.ANY,
['ovn-northd', 'ovn-ovsdb-server-nb', 'ovn-ovsdb-server-sb'],
mock.ANY
),
])
self.NRPE.assert_has_calls([
mock.call().write(),
])
with tempfile.TemporaryDirectory() as dtmp:
os.environ['CHARM_DIR'] = dtmp
self.patch_object(ovn_central.nrpe, 'NRPE')
self.patch_object(ovn_central.nrpe, 'add_init_service_checks')
self.target.render_nrpe()
# Note that this list is valid for Ussuri
self.add_init_service_checks.assert_has_calls([
mock.call().add_init_service_checks(
mock.ANY,
['ovn-northd', 'ovn-ovsdb-server-nb',
'ovn-ovsdb-server-sb'],
mock.ANY
),
])
self.NRPE.assert_has_calls([
mock.call().write(),
])
def test_configure_deferred_restarts(self):
self.patch_object(