Added NRPE checks for manila-ganesha charm.

- added 3 basic checks to check: nfs connection, nfs services, and nfs shares
- updated test bundles to include nrpe and nrpe-external-master for
  functional tests.

func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/806

Closes-bug: #1925975
Change-Id: I4d3736300f75b9811f4f6525b5454c7e495ef566
This commit is contained in:
Chi Wai, Chan 2022-06-28 14:56:50 +08:00
parent bb2a7ed8c8
commit 65ca408318
13 changed files with 480 additions and 2 deletions

View File

@ -14,6 +14,22 @@ options:
Note that updating this setting to a source that is known to
provide a later version of OpenStack will trigger a software
upgrade.
nagios_context:
default: "juju"
type: string
description: |
Used by the nrpe subordinate charms.
A string that will be prepended to instance name to set the host name
in nagios. So for instance the hostname would be something like:
juju-myservice-0
If you're running multiple environments with the same services in them
this allows you to differentiate between them.
nagios_servicegroups:
default: ""
type: string
description: |
A comma-separated list of nagios servicegroups. If left empty, the
nagios_context will be used as the servicegroup.
rabbit-user:
default: manila
type: string

View File

@ -0,0 +1,75 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import subprocess
from argparse import ArgumentParser
PORT = 2049
HOSTNAME = subprocess.check_output(
"hostname",
shell=True
).decode().strip("\n")
CHECK_TCP = "/usr/lib/nagios/plugins/check_tcp -H {} -p {}"
SERVICE_STATUS = {
0: "OKAY",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN",
}
def main(args):
exit_code = 0
message = ""
try:
output = subprocess.check_output(
CHECK_TCP.format(args.domain, args.port),
stderr = subprocess.STDOUT,
shell = True,
).decode().strip()
message = output
except subprocess.CalledProcessError as e:
exit_code = e.returncode
message = e.stdout
except Exception as e:
exit_code = 3
message = e
print(f"NFS service {SERVICE_STATUS[exit_code]}: {message}")
sys.exit(exit_code)
def parse_cli():
parser = ArgumentParser()
parser.add_argument(
"--domain",
type=str,
default=HOSTNAME,
help="Set the NFS domain.",
)
parser.add_argument(
"--port",
type=int,
default=PORT,
help="Set the NFS port.",
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_cli()
main(args)

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import subprocess
from pathlib import Path
CHECK_EXPORTS = "/usr/sbin/showmount -e | awk '{ print $1 }' | sed '1,1d'"
SERVICE_STATUS = {
0: "OKAY",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN",
}
def main():
exit_code = 0
message = ""
try:
# Run command.
output = subprocess.check_output(
CHECK_EXPORTS,
stderr = subprocess.STDOUT,
shell = True,
executable = "/bin/bash"
).decode().strip()
if len(output) == 0:
exit_code = 3
message = "no export filesystem."
else:
# Get a list of export directories.
exports = list(map(Path, output.split("\n")))
# Get a list of inaccessible export directories.
bad_exports = [str(export)
for export in exports if not export.exists()]
if len(bad_exports) > 0:
exit_code = 3
message = f"Export {', '.join(bad_exports)} does not exist."
else:
message = f"Export {', '.join(bad_exports)} are okay."
except subprocess.CalledProcessError as e:
exit_code = e.returncode
message = e.stdout
except Exception as e:
exit_code = 3
message = e
print(f"NFS service {SERVICE_STATUS[exit_code]}: {message}")
sys.exit(exit_code)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import subprocess
CHECK_CMD = "/usr/sbin/rpcinfo | awk '{print $5}' | sed '1,1d' | sort | uniq"
SERVICE_STATUS = {
0: "OKAY",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN",
}
def main():
# List of RPC processes used by NFS
missing_procs = set(["nfs", "mountd", "portmapper", "nlockmgr", "rquotad"])
exit_code = 0
message = ""
try:
# Run command.
output = subprocess.check_output(
CHECK_CMD,
stderr = subprocess.STDOUT,
shell = True,
executable = "/bin/bash"
).decode().strip()
# Get a list of RPC processes.
procs = set(output.split("\n"))
# Get the missing processes
missing_procs -= procs
if len(missing_procs) > 0:
exit_code = 2
message = (
f"RPC processes "
f"{', '.join(missing_procs)} "
f"not running which are required by NFS."
)
else:
message = "All RPC prcesses needed by NFS are running."
except subprocess.CalledProcessError as e:
exit_code = e.returncode
message = e.stdout
except Exception as e:
exit_code = 3
message = e
print(f"NFS service {SERVICE_STATUS[exit_code]}: {message}")
sys.exit(exit_code)
if __name__ == "__main__":
main()

View File

@ -6,6 +6,7 @@ includes:
- interface:rabbitmq
- interface:keystone-credentials
- interface:manila-plugin
- interface:nrpe-external-master
options:
basic:
use_venv: True

View File

@ -40,6 +40,12 @@ from charmhelpers.contrib.storage.linux.ceph import (
)
import charmhelpers.core as ch_core
from lib.nfs_ganesha_nrpe import (
install_nrpe_checks,
install_nrpe_plugins,
remove_nrpe_plugins,
remove_nrpe_checks,
)
MANILA_DIR = '/etc/manila/'
MANILA_CONF = MANILA_DIR + "manila.conf"
@ -405,6 +411,18 @@ class ManilaGaneshaCharm(charms_openstack.charm.HAOpenStackCharm,
'client': ch_core.hookenv.application_name()})
ceph.send_request_if_needed(rq)
def install_nrpe_checks(self, enable_cron=True):
return install_nrpe_checks(enable_cron=enable_cron)
def remove_nrpe_checks(self):
remove_nrpe_checks()
def install_nrpe_plugins(self):
return install_nrpe_plugins()
def remove_nrpe_plugins(self):
remove_nrpe_plugins()
class ManilaGaneshaUssuriCharm(ManilaGaneshaCharm,
):

196
src/lib/nfs_ganesha_nrpe.py Normal file
View File

@ -0,0 +1,196 @@
# Copyright (C) 2022 Canonical
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
from pathlib import Path
from glob import glob
import charmhelpers.core.hookenv as hookenv
from charmhelpers.contrib.charmsupport import nrpe
# Relative path from the root directory
PLUGINS_DIR = "files/plugins"
CHECK_SCRIPTS = [
{
"shortname": "nfs_conn",
"description": "NFS server listening check.",
"check_cmd": "/usr/local/lib/nagios/plugins/check_nfs_conn",
},
{
"shortname": "nfs_exports",
"description": "NFS exports check.",
"check_cmd": "/usr/local/lib/nagios/plugins/check_nfs_exports",
},
{
"shortname": "nfs_services",
"description": "NFS services check.",
"check_cmd": "/usr/local/lib/nagios/plugins/check_nfs_services",
},
]
class CustomNRPE(nrpe.NRPE):
# Target installation paths
NRPE_PLUGINS_DIR = Path("/usr/local/lib/nagios/plugins")
CROND_DIR = Path("/etc/cron.d")
# Values are pathlib.Path objects.
installed_plugins = set()
installed_cronjobs = {}
def __init__(self, hostname=None, primary=True):
super().__init__(hostname=hostname, primary=primary)
def install_all_custom_plugins(self):
search_dir = Path(hookenv.charm_dir(), PLUGINS_DIR)
plugins = glob(str(search_dir / "*"))
for src in plugins:
self.install_custom_plugin(src)
def remove_all_custom_plugins(self):
plugins = map(Path, list(CustomNRPE.installed_plugins))
for dst in plugins:
self.remove_custom_plugin(dst)
def install_custom_plugin(self, src):
try:
dst = shutil.copy(src, CustomNRPE.NRPE_PLUGINS_DIR)
os.chmod(dst, 0o100755)
os.chown(dst, 0, 0)
hookenv.log(
f"NRPE: Successfully installed {dst}.",
hookenv.DEBUG
)
CustomNRPE.installed_plugins.add(Path(dst))
except Exception as e:
hookenv.log(
f"NRPE: Failed to installed {src}.",
hookenv.ERROR
)
raise e
return Path(dst)
def remove_custom_plugin(self, dst):
try:
dst.unlink()
hookenv.log(
f"NRPE: Successfully removed {dst}.",
hookenv.DEBUG
)
CustomNRPE.installed_plugins.remove(Path(dst))
except Exception as e:
hookenv.log(
f"NRPE: Failed to installed {dst}.",
hookenv.ERROR
)
raise e
return Path(dst)
def install_custom_cronjob(self, command, name):
if name in CustomNRPE.installed_cronjobs:
hookenv.log(
f"NRPE: Failed to installed {name}.",
hookenv.ERROR
)
raise ValueError("cron job name must be unique.")
cronpath = f"/etc/cron.d/nagios-check_{name}"
output_path = f"{super().homedir}/check_{name}.txt"
cron_file = f"*/5 * * * * root {command} > {output_path}\n"
try:
with open(cronpath, "w") as f:
f.write(cron_file)
hookenv.log(
f"cron.d: Successfully installed {cronpath}.",
hookenv.DEBUG
)
CustomNRPE.installed_cronjobs[name] = [
Path(cronpath),
Path(output_path),
]
except Exception as e:
hookenv.log(
f"cron.d: Failed to installed {cronpath}.",
hookenv.ERROR
)
raise e
def remove_custom_cronjob(self, name):
try:
for f in CustomNRPE.installed_cronjobs[name]:
if f.exists():
f.unlink()
hookenv.log(
f"cron.d: Successfully removed {f}.",
hookenv.DEBUG
)
CustomNRPE.installed_cronjobs.pop(name)
except Exception as e:
hookenv.log(
f"cron.d: Failed to remove {name}.",
hookenv.ERROR
)
raise e
def remove_all_custom_cronjobs(self):
for name in CustomNRPE.installed_cronjobs.copy():
self.remove_custom_cronjob(name)
def install_nrpe_checks(enable_cron=False):
"""Configure NRPE checks, i.e. adding custom check script or using standard
nrpe check script."""
custom_nrpe = CustomNRPE()
for check in CHECK_SCRIPTS:
custom_nrpe.add_check(
shortname=check["shortname"],
description=check["description"],
check_cmd=check["check_cmd"]
)
if enable_cron:
custom_nrpe.install_custom_cronjob(
check["check_cmd"], check["shortname"]
)
custom_nrpe.write()
return CustomNRPE.installed_cronjobs.copy()
def remove_nrpe_checks():
"""Remove existing NRPE checks."""
custom_nrpe = CustomNRPE()
for check in CHECK_SCRIPTS:
custom_nrpe.remove_check(
shortname=check["shortname"],
)
custom_nrpe.remove_all_custom_cronjobs()
custom_nrpe.write()
def install_nrpe_plugins():
"""Install all available local nagios nrpe plugins defined in PLUGINS_DIR.
"""
custom_nrpe = CustomNRPE()
custom_nrpe.install_all_custom_plugins()
return CustomNRPE.installed_plugins.copy()
def remove_nrpe_plugins():
"""Remove all available local nagios nrpe plugins defined in PLUGINS_DIR.
"""
custom_nrpe = CustomNRPE()
custom_nrpe.remove_all_custom_plugins()

View File

@ -18,6 +18,9 @@ extra-bindings:
provides:
manila-plugin:
interface: manila-plugin
nrpe-external-master:
interface: nrpe-external-master
scope: container
requires:
ceph:
interface: ceph-client

View File

@ -9,7 +9,6 @@ import charms.reactive.relations as relations
import charmhelpers.core as ch_core
from charmhelpers.core.hookenv import log
charms_openstack.bus.discover()
# Use the charms.openstack defaults for common states and hooks
@ -146,3 +145,19 @@ def disable_services():
# based on the expectation of multiple units via goal-state
ch_core.host.service('unmask', 'manila-share')
reactive.set_flag('services-disabled')
@reactive.when('nrpe-external-master.available')
def configure_nrpe():
"""Config and install NRPE plugins."""
with charm.provide_charm_instance() as this_charm:
this_charm.install_nrpe_plugins()
this_charm.install_nrpe_checks()
@reactive.when_not('nrpe-external-master.available')
def remove_nrpe():
"""Remove installed NRPE plugins."""
with charm.provide_charm_instance() as this_charm:
this_charm.remove_nrpe_plugins()
this_charm.remove_nrpe_checks()

View File

@ -215,6 +215,9 @@ services:
- '23'
channel: yoga/edge
nrpe:
charm: ch:nrpe
relations:
- - 'ceph-mon'
@ -332,3 +335,6 @@ relations:
- - 'nova-cloud-controller:quantum-network-service'
- 'neutron-gateway:quantum-network-service'
- - 'manila-ganesha-az1:nrpe-external-master'
- 'nrpe:nrpe-external-master'

View File

@ -215,6 +215,9 @@ services:
- '23'
channel: yoga/edge
nrpe:
charm: ch:nrpe
relations:
- - 'ceph-mon'
@ -332,3 +335,6 @@ relations:
- - 'nova-cloud-controller:quantum-network-service'
- 'neutron-gateway:quantum-network-service'
- - 'manila-ganesha-az1:nrpe-external-master'
- 'nrpe:nrpe-external-master'

View File

@ -215,6 +215,9 @@ services:
- '23'
channel: yoga/edge
nrpe:
charm: ch:nrpe
relations:
- - 'ceph-mon'
@ -332,3 +335,6 @@ relations:
- - 'nova-cloud-controller:quantum-network-service'
- 'neutron-gateway:quantum-network-service'
- - 'manila-ganesha-az1:nrpe-external-master'
- 'nrpe:nrpe-external-master'

View File

@ -49,6 +49,7 @@ class TestRegisteredHooks(test_utils.TestRegisteredHooks):
'enable_services_in_non_ha': ('config.rendered',
'ganesha-pool-configured',),
'disable_services': ('cluster.connected',),
'configure_nrpe': ('nrpe-external-master.available',),
},
'when_not': {
'ceph_connected': ('ganesha-pool-configured',),
@ -58,11 +59,12 @@ class TestRegisteredHooks(test_utils.TestRegisteredHooks):
'services-started',),
'cluster_connected': ('ha-resources-exposed',),
'disable_services': ('services-disabled',),
'remove_nrpe': ('nrpe-external-master.available',),
},
'when_all': {
'configure_ganesha': ('config.rendered',
'ceph.pools.available',),
}
},
}
# test that the hooks were registered via the
# reactive.manila_ganesha_handlers