Add ovn cert nrpe check

Certs are root readable so we use a cron job to perform
the check and save state for an nrpe check to read and
send back to nagios.

Closes-Bug: #1979539
Change-Id: Ia6df3fc2e3bc0d64ef2128ed65959a5d2d9b3d62
This commit is contained in:
Edward Hope-Morley 2023-09-25 16:27:48 +01:00
parent 03fbf73297
commit fec04c22a8
8 changed files with 222 additions and 1 deletions

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Canonical
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import sys
from datetime import datetime, timedelta
NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data'
WARN = 1
SUCCESS = 0
if __name__ == "__main__":
output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json')
if os.path.exists(output_path):
with open(output_path) as fd:
try:
status = json.loads(fd.read())
ts = datetime.strptime(status['last_updated'],
"%Y-%m-%d %H:%M:%S")
if datetime.now() - ts > timedelta(days=1):
print("ovn cert check status is more than 24 hours old "
"(last_updated={})".format(status['last_updated']))
sys.exit(WARN)
print(status['message'])
sys.exit(status['exit_code'])
except ValueError:
print("invalid check output")
sys.exit(WARN)
else:
print("no info available")
sys.exit(WARN)
sys.exit(SUCCESS)

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Canonical
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
from datetime import datetime
from cryptography.hazmat.backends import default_backend
from cryptography import x509
NAGIOS_PLUGIN_DATA = '/usr/local/lib/nagios/juju_charm_plugin_data'
CRITICAL = 2
WARN = 1
SUCCESS = 0
CERT_EXPIRY_LIMIT = 60
class SSLCertificate(object):
def __init__(self, path):
self.path = path
@property
def cert(self):
with open(self.path, "rb") as fd:
return fd.read()
@property
def expiry_date(self):
cert = x509.load_pem_x509_certificate(self.cert, default_backend())
return cert.not_valid_after
@property
def days_remaining(self):
return int((self.expiry_date - datetime.now()).days)
def check_ovn_certs():
output_path = os.path.join(NAGIOS_PLUGIN_DATA, 'ovn_cert_status.json')
if not os.path.isdir(NAGIOS_PLUGIN_DATA):
os.makedirs(NAGIOS_PLUGIN_DATA)
exit_code = SUCCESS
for cert in ['/etc/ovn/cert_host', '/etc/ovn/ovn-central.crt']:
if not os.path.exists(cert):
message = "cert '{}' does not exist.".format(cert)
exit_code = CRITICAL
break
if not os.access(cert, os.R_OK):
message = "cert '{}' is not readable.".format(cert)
exit_code = CRITICAL
break
try:
remaining_days = SSLCertificate(cert).days_remaining
if remaining_days <= 0:
message = "{}: cert has expired.".format(cert)
exit_code = CRITICAL
break
if remaining_days < CERT_EXPIRY_LIMIT:
message = ("{}: cert will expire soon (less than {} days).".
format(cert, CERT_EXPIRY_LIMIT))
exit_code = WARN
break
except Exception as exc:
message = "failed to check cert '{}': {}".format(cert, str(exc))
exit_code = WARN
else:
message = "all certs healthy"
exit_code = SUCCESS
ts = datetime.now()
with open(output_path, 'w') as fd:
fd.write(json.dumps({'message': message,
'exit_code': exit_code,
'last_updated':
ts.strftime("%Y-%m-%d %H:%M:%S")}))
os.chmod(output_path, 644)
if __name__ == "__main__":
check_ovn_certs()

View File

@ -1,5 +1,6 @@
includes:
- layer:ovn
- interface:nrpe-external-master
- interface:ovsdb
- interface:ovsdb-subordinate
- interface:neutron-plugin

View File

@ -12,10 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from charmhelpers.core.host import rsync, write_file
from charmhelpers.contrib.charmsupport import nrpe
import charmhelpers.fetch as ch_fetch
import charms_openstack.charm as charm
import charms.ovn_charm
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
SCRIPTS_DIR = '/usr/local/bin'
CERTCHECK_CRONFILE = '/etc/cron.d/ovn-chassis-cert-checks'
CRONJOB_CMD = "{schedule} root {command} 2>&1 | logger -p local0.notice\n"
charm.use_defaults('charm.default-select-release')
@ -30,6 +41,40 @@ class OVNChassisCharm(charms.ovn_charm.DeferredEventMixin,
release = 'ussuri'
name = 'ovn-chassis'
# packages needed by nrpe checks
nrpe_packages = ['python3-cryptography']
# Setting an empty source_config_key activates special handling of release
# selection suitable for subordinate charms
source_config_key = ''
@property
def packages(self):
return super().packages + self.nrpe_packages
def render_nrpe(self):
hostname = nrpe.get_nagios_hostname()
self.add_nrpe_certs_check(nrpe.NRPE(hostname=hostname))
super().render_nrpe()
def add_nrpe_certs_check(self, charm_nrpe):
script = 'nrpe_check_ovn_certs.py'
src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', script)
dst = os.path.join(NAGIOS_PLUGINS, script)
rsync(src, dst)
charm_nrpe.add_check(
shortname='check_ovn_certs',
description='Check that ovn certs are valid.',
check_cmd=script
)
# Need to install this as a system package since it is needed by the
# cron script that runs outside of the charm.
ch_fetch.apt_install(['python3-cryptography'])
script = 'check_ovn_certs.py'
src = os.path.join(os.getenv('CHARM_DIR'), 'files', 'scripts', script)
dst = os.path.join(SCRIPTS_DIR, script)
rsync(src, dst)
cronjob = CRONJOB_CMD.format(
schedule='*/15 * * * *',
command=dst)
write_file(CERTCHECK_CRONFILE, cronjob)

View File

@ -21,6 +21,9 @@ provides:
ovsdb-subordinate:
interface: ovsdb-subordinate
scope: container
nrpe-external-master:
interface: nrpe-external-master
scope: container
requires:
juju-info:
interface: juju-info

View File

@ -27,3 +27,15 @@ def enable_ovn_chassis_handlers():
def configure_deferred_restarts():
with charm.provide_charm_instance() as instance:
instance.configure_deferred_restarts()
@reactive.when_none('charm.paused', 'is-update-status-hook')
@reactive.when('config.rendered')
@reactive.when_any('config.changed.nagios_context',
'config.changed.nagios_servicegroups',
'endpoint.nrpe-external-master.changed',
'nrpe-external-master.available')
def configure_nrpe():
"""Handle config-changed for NRPE options."""
with charm.provide_charm_instance() as charm_instance:
charm_instance.render_nrpe()

View File

@ -61,7 +61,9 @@ commands = stestr run --slowest {posargs}
basepython = python3
deps = flake8==3.9.2
git+https://github.com/juju/charm-tools.git
commands = flake8 {posargs} src unit_tests
commands = flake8 {posargs} src unit_tests \
{toxinidir}/src/files/nagios/nrpe_check_ovn_certs.py \
{toxinidir}/src/files/scripts/check_ovn_certs.py
[testenv:cover]
# Technique based heavily upon

View File

@ -24,10 +24,22 @@ class TestRegisteredHooks(test_utils.TestRegisteredHooks):
def test_hooks(self):
hook_set = {
'when_none': {
'configure_nrpe': ('charm.paused', 'is-update-status-hook',),
},
'when_not': {
'enable_ovn_chassis_handlers': ('MOCKED_FLAG',),
'configure_deferred_restarts': ('is-update-status-hook',),
},
'when': {
'configure_nrpe': ('config.rendered',),
},
'when_any': {
'configure_nrpe': ('config.changed.nagios_context',
'config.changed.nagios_servicegroups',
'endpoint.nrpe-external-master.changed',
'nrpe-external-master.available',),
},
}
# test that the hooks were registered via the
# reactive.ovn_handlers