NRPE: Monitor threads connected to MySQL.
Add a NRPE check to monitor the number of threads connected to the MySQL database, in proportion to the maximum number of connections. For this check, a nagios user will be created. This user does not have any permissions set, does not have access to any database and can only connect from localhost. Warning and Critical thresholds (in percentage) can be configured. Add an action to reset nagios's password. This action could only be run on the leader unit. Closes-Bug: #1816759 Change-Id: Id35b0331322c2744a9f839b3eb153eed1bc53aac
This commit is contained in:
parent
2962cc9a9a
commit
11ee1ef563
@ -38,6 +38,14 @@ to form its cluster. It is best practice to use this option as doing so ensures
|
||||
that the charm will wait until the cluster is up before accepting relations
|
||||
from other client applications.
|
||||
|
||||
#### `nrpe-threads-connected`
|
||||
|
||||
The `nrpe-threads-connected` option sets Warning and Critical thresholds (in percent)
|
||||
for NRPE check to monitor the number of threads connecting to the MySQL.
|
||||
If the `nrpe-external-master` relationship is set, a nagios user who does
|
||||
not have permission and can only connect from localhost is created before
|
||||
the check is created.
|
||||
|
||||
## Deployment
|
||||
|
||||
To deploy a single percona-cluster unit:
|
||||
|
@ -64,3 +64,8 @@ mysqldump:
|
||||
description: |
|
||||
Comma delimited database names to dump. If left unset, all databases
|
||||
will be dumped.
|
||||
generate-nagios-password:
|
||||
description: |
|
||||
Re-generate the password for the nagios user. This action can only be run on the leader unit,
|
||||
which sets a new password and then the `leader-settings-changed` hook will be activate.
|
||||
|
||||
|
@ -36,6 +36,7 @@ from charmhelpers.core.hookenv import (
|
||||
from charmhelpers.core.host import (
|
||||
CompareHostReleases,
|
||||
lsb_release,
|
||||
pwgen,
|
||||
)
|
||||
|
||||
from charmhelpers.contrib.openstack.utils import (
|
||||
@ -223,6 +224,16 @@ def mysqldump(args):
|
||||
action_fail("mysqldump failed")
|
||||
|
||||
|
||||
def generate_nagios_password(args):
|
||||
"""Regenerate nagios password."""
|
||||
if is_leader():
|
||||
leader_set({"nagios-password": pwgen()})
|
||||
percona_utils.set_nagios_user()
|
||||
action_set({"output": "New password for nagios created successfully."})
|
||||
else:
|
||||
action_fail("This action should only take place on the leader unit.")
|
||||
|
||||
|
||||
# A dictionary of all the defined actions to callables (which take
|
||||
# parsed arguments).
|
||||
ACTIONS = {"pause": pause, "resume": resume, "backup": backup,
|
||||
@ -230,7 +241,8 @@ ACTIONS = {"pause": pause, "resume": resume, "backup": backup,
|
||||
"bootstrap-pxc": bootstrap_pxc,
|
||||
"notify-bootstrapped": notify_bootstrapped,
|
||||
"set-pxc-strict-mode": set_pxc_strict_mode,
|
||||
"mysqldump": mysqldump}
|
||||
"mysqldump": mysqldump,
|
||||
"generate-nagios-password": generate_nagios_password}
|
||||
|
||||
|
||||
def main(args):
|
||||
|
1
actions/generate-nagios-password
Symbolic link
1
actions/generate-nagios-password
Symbolic link
@ -0,0 +1 @@
|
||||
actions.py
|
@ -358,3 +358,10 @@ options:
|
||||
control message will be sent. The fc_limit defaults to 16 transactions.
|
||||
This effectively means that this is as far as a given node can be behind
|
||||
committing transactions from the cluster.
|
||||
nrpe-threads-connected:
|
||||
type: string
|
||||
default: '80,90'
|
||||
description: |
|
||||
This configuration option represents the warning and critical percentages
|
||||
that are used to check the number of threads connected to MySQL.
|
||||
The value should be written as a string containing two numbers separated by commas.
|
||||
|
@ -34,6 +34,7 @@ from charmhelpers.core.hookenv import (
|
||||
DEBUG,
|
||||
INFO,
|
||||
WARNING,
|
||||
ERROR,
|
||||
is_leader,
|
||||
network_get_primary_address,
|
||||
leader_get,
|
||||
@ -57,6 +58,7 @@ from charmhelpers.fetch import (
|
||||
apt_install,
|
||||
add_source,
|
||||
SourceConfigError,
|
||||
filter_installed_packages,
|
||||
)
|
||||
from charmhelpers.contrib.peerstorage import (
|
||||
peer_echo,
|
||||
@ -151,6 +153,9 @@ from percona_utils import (
|
||||
delete_replication_user,
|
||||
list_replication_users,
|
||||
check_mysql_connection,
|
||||
set_nagios_user,
|
||||
get_nrpe_threads_connected_thresholds,
|
||||
MYSQL_NAGIOS_CREDENTIAL_FILE,
|
||||
update_source,
|
||||
ADD_APT_REPOSITORY_FAILED,
|
||||
)
|
||||
@ -1035,16 +1040,40 @@ def leader_elected():
|
||||
'nrpe-external-master-relation-changed')
|
||||
def update_nrpe_config():
|
||||
# python-dbus is used by check_upstart_job
|
||||
apt_install('python-dbus')
|
||||
# nagios-plugins-contrib add pmp-check-mysql-status check
|
||||
packages = filter_installed_packages(["python-dbus",
|
||||
"nagios-plugins-contrib"])
|
||||
apt_install(packages)
|
||||
|
||||
hostname = nrpe.get_nagios_hostname()
|
||||
current_unit = nrpe.get_nagios_unit_name()
|
||||
nrpe_setup = nrpe.NRPE(hostname=hostname)
|
||||
nrpe.add_init_service_checks(nrpe_setup, ['mysql'], current_unit)
|
||||
nrpe_setup.add_check(
|
||||
shortname='mysql_proc',
|
||||
description='Check MySQL process {%s}' % current_unit,
|
||||
description='Check MySQL process {}'.format(current_unit),
|
||||
check_cmd='check_procs -c 1:1 -C mysqld'
|
||||
)
|
||||
try:
|
||||
warning_threads, critical_threads = \
|
||||
get_nrpe_threads_connected_thresholds()
|
||||
except ValueError as error:
|
||||
log("failed to get thresholds from nrpe-threads-connected due: "
|
||||
"{}".format(error), level=ERROR)
|
||||
log("the default thresholds are used")
|
||||
warning_threads, critical_threads = 80, 90
|
||||
|
||||
set_nagios_user()
|
||||
nrpe_setup.add_check(
|
||||
shortname='mysql_threads',
|
||||
description='Check MySQL connected threads',
|
||||
check_cmd='pmp-check-mysql-status --defaults-file {credential_file} '
|
||||
'-x Threads_connected -o / -y max_connections -T pct '
|
||||
'-w {warning} -c {critical}'.format(
|
||||
credential_file=MYSQL_NAGIOS_CREDENTIAL_FILE,
|
||||
warning=warning_threads,
|
||||
critical=critical_threads)
|
||||
)
|
||||
nrpe_setup.write()
|
||||
|
||||
|
||||
|
@ -97,6 +97,7 @@ ADD_APT_REPOSITORY_FAILED = "add-apt-repository-failed"
|
||||
# maintenance mode, but is currently not populated as the
|
||||
# charm_check_function() checks whether the unit is working properly.
|
||||
REQUIRED_INTERFACES = {}
|
||||
MYSQL_NAGIOS_CREDENTIAL_FILE = "/etc/nagios/mysql-check.cnf"
|
||||
|
||||
|
||||
class LeaderNoBootstrapUUIDError(Exception):
|
||||
@ -1053,6 +1054,8 @@ root_password = partial(_get_password, 'root-password')
|
||||
|
||||
sst_password = partial(_get_password, 'sst-password')
|
||||
|
||||
nagios_password = partial(_get_password, 'nagios-password')
|
||||
|
||||
|
||||
def pxc_installed():
|
||||
'''Determine whether percona-xtradb-cluster is installed
|
||||
@ -1744,3 +1747,94 @@ def update_source(source, key=None):
|
||||
else:
|
||||
apt_update() # run without retries
|
||||
log("apt update after adding a new package source")
|
||||
|
||||
|
||||
def get_nrpe_threads_connected_thresholds():
|
||||
"""This function get and verifies threshold values.
|
||||
|
||||
The function confirms these conditions:
|
||||
Thresholds for `nrpe-threads-connected` should be separable by `,` and
|
||||
represented by an integer, where the warning threshold should be
|
||||
in the range [0, 100) and the critical threshold in the range (0, 100).
|
||||
At the same time, the warning threshold should be lower than
|
||||
the critical one.
|
||||
|
||||
:returns: warning threshold, critical threshold
|
||||
:rtype: Tuple[int, int]
|
||||
:raises: ValueError
|
||||
"""
|
||||
values = config("nrpe-threads-connected").split(",")
|
||||
|
||||
if len(values) != 2:
|
||||
raise ValueError("the wrong number of values was set for "
|
||||
"the nrpe-threads-connected")
|
||||
|
||||
try:
|
||||
warning_threshold, critical_threshold = int(values[0]), int(values[1])
|
||||
except ValueError as error:
|
||||
raise error
|
||||
|
||||
if not (0 <= warning_threshold < 100 and 0 < critical_threshold <= 100):
|
||||
raise ValueError("the warning threshold must be in the range [0,100) "
|
||||
"and the critical threshold must be in the range "
|
||||
"(0,100]")
|
||||
|
||||
if warning_threshold >= critical_threshold:
|
||||
raise ValueError("the warning threshold must be less than critical")
|
||||
|
||||
return warning_threshold, critical_threshold
|
||||
|
||||
|
||||
def write_nagios_my_cnf():
|
||||
"""Create a MySQL configuration file with nagios user credentials."""
|
||||
context = {
|
||||
"mysql_user": "nagios", "mysql_passwd": nagios_password(),
|
||||
}
|
||||
render("nagios-my.cnf", MYSQL_NAGIOS_CREDENTIAL_FILE, context,
|
||||
owner="nagios", group="nagios", perms=0o640)
|
||||
|
||||
|
||||
def create_nagios_user():
|
||||
"""Create MySQL user for nagios to check the status of MySQL.
|
||||
|
||||
This user does not have any permissions set, does not have access
|
||||
to any database and can only connect from localhost.
|
||||
|
||||
:raises: OperationalError
|
||||
"""
|
||||
m_helper = get_db_helper()
|
||||
try:
|
||||
m_helper.connect(password=m_helper.get_mysql_root_password())
|
||||
except OperationalError:
|
||||
log("Could not connect to db", level=ERROR)
|
||||
raise
|
||||
# NOTE (rgildein): This user has access to the SHOW GLOBAL STATUS
|
||||
# statement, which does not require any privilege, only the ability to
|
||||
# connect to the server.
|
||||
nagios_exists = False
|
||||
try:
|
||||
# NOTE (rgildein): This method of creation nagios user should be
|
||||
# replaced by the `MySQLHelper.create_user` method after
|
||||
# bug #1905586 has been fixed and merged.
|
||||
nagios_exists = m_helper.select(
|
||||
"SELECT EXISTS(SELECT 1 FROM mysql.user WHERE user = 'nagios')"
|
||||
)
|
||||
m_helper.execute("CREATE USER 'nagios'@'localhost';")
|
||||
except OperationalError as error:
|
||||
if not nagios_exists:
|
||||
log("Creating user nagios failed due: {}".format(error),
|
||||
level="ERROR")
|
||||
raise error
|
||||
else:
|
||||
log("User 'nagios'@'localhost' already exists.", level="WARNING")
|
||||
# NOTE (rgildein): Update the user's password if it has changed.
|
||||
m_helper.execute("ALTER USER 'nagios'@'localhost' IDENTIFIED BY "
|
||||
"'{passwd}';".format(passwd=nagios_password()))
|
||||
|
||||
|
||||
def set_nagios_user():
|
||||
"""Create MySQL user and configuration file with user credentials."""
|
||||
if is_leader():
|
||||
create_nagios_user()
|
||||
|
||||
write_nagios_my_cnf()
|
||||
|
3
templates/nagios-my.cnf
Normal file
3
templates/nagios-my.cnf
Normal file
@ -0,0 +1,3 @@
|
||||
[client]
|
||||
user="{{ mysql_user }}"
|
||||
password="{{ mysql_passwd }}"
|
@ -105,3 +105,35 @@ class MainTestCase(CharmTestCase):
|
||||
with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
|
||||
actions.main(["foo"])
|
||||
self.assertEqual(dummy_calls, ["Action foo failed: uh oh"])
|
||||
|
||||
|
||||
class NagiosTestCase(CharmTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(NagiosTestCase, self).setUp(actions,
|
||||
["action_set",
|
||||
"action_fail",
|
||||
"is_leader",
|
||||
"leader_set",
|
||||
"pwgen",
|
||||
])
|
||||
|
||||
@patch.object(actions.percona_utils, "set_nagios_user")
|
||||
def test_generate_nagios_password(self, mock_set_nagios_user):
|
||||
"""Test regenerate new password for nagios user."""
|
||||
self.is_leader.return_value = True
|
||||
self.pwgen.return_value = "1234"
|
||||
actions.generate_nagios_password([])
|
||||
self.leader_set.assert_called_once_with({"nagios-password": "1234"})
|
||||
mock_set_nagios_user.assert_called_once_with()
|
||||
self.action_set.assert_called_once_with(
|
||||
{"output": "New password for nagios created successfully."}
|
||||
)
|
||||
|
||||
def test_generate_nagios_password_no_leader(self):
|
||||
"""Test regenerate new password for nagios user at no leader unit."""
|
||||
self.is_leader.return_value = False
|
||||
actions.generate_nagios_password([])
|
||||
self.action_fail.assert_called_once_with(
|
||||
"This action should only take place on the leader unit."
|
||||
)
|
||||
|
@ -266,15 +266,45 @@ class TestNRPERelation(CharmTestCase):
|
||||
def setUp(self):
|
||||
patch_targets_nrpe = TO_PATCH[:]
|
||||
patch_targets_nrpe.remove("update_nrpe_config")
|
||||
patch_targets_nrpe.append("nrpe")
|
||||
patch_targets_nrpe.append("apt_install")
|
||||
patch_targets_nrpe.extend(["nrpe",
|
||||
"apt_install",
|
||||
"config",
|
||||
"set_nagios_user"])
|
||||
CharmTestCase.setUp(self, hooks, patch_targets_nrpe)
|
||||
|
||||
def test_mysql_monitored(self):
|
||||
@mock.patch("percona_utils.config")
|
||||
@mock.patch("percona_utils.is_leader")
|
||||
@mock.patch("percona_utils.leader_get")
|
||||
def test_mysql_monitored(self,
|
||||
mock_leader_get,
|
||||
mock_is_leader,
|
||||
mock_config):
|
||||
"""The mysql service is monitored by Nagios."""
|
||||
self.nrpe.get_nagios_unit_name.return_value = "nagios-0"
|
||||
self.test_config.set("nrpe-threads-connected", "80,90")
|
||||
mock_config.side_effect = self.test_config.get
|
||||
mock_is_leader.return_value = True
|
||||
mock_leader_get.return_value = "1234"
|
||||
nrpe_setup = mock.MagicMock()
|
||||
nrpe_setup.add_check = mock.MagicMock()
|
||||
self.nrpe.NRPE.return_value = nrpe_setup
|
||||
|
||||
hooks.update_nrpe_config()
|
||||
self.nrpe.add_init_service_checks.assert_called_once_with(
|
||||
mock.ANY, ["mysql"], mock.ANY)
|
||||
nrpe_setup.add_check.assert_has_calls([
|
||||
mock.call(
|
||||
shortname="mysql_proc",
|
||||
description="Check MySQL process nagios-0",
|
||||
check_cmd="check_procs -c 1:1 -C mysqld"),
|
||||
mock.call(
|
||||
shortname="mysql_threads",
|
||||
description="Check MySQL connected threads",
|
||||
check_cmd="pmp-check-mysql-status "
|
||||
"--defaults-file /etc/nagios/mysql-check.cnf "
|
||||
"-x Threads_connected -o / -y max_connections "
|
||||
"-T pct -w 80 -c 90")
|
||||
])
|
||||
|
||||
|
||||
class TestMasterRelation(CharmTestCase):
|
||||
|
@ -17,6 +17,7 @@ class UtilsTests(CharmTestCase):
|
||||
TO_PATCH = [
|
||||
'config',
|
||||
'kv',
|
||||
'is_leader',
|
||||
'leader_get',
|
||||
'log',
|
||||
'relation_ids',
|
||||
@ -538,6 +539,74 @@ class UtilsTests(CharmTestCase):
|
||||
|
||||
mock_apt_update.assert_not_called()
|
||||
|
||||
@mock.patch.object(percona_utils, "get_db_helper")
|
||||
@mock.patch.object(percona_utils, "write_nagios_my_cnf")
|
||||
def test_create_nagios_user(self,
|
||||
mock_create_nagios_mysql_credential,
|
||||
mock_get_db_helper):
|
||||
my_mock = mock.Mock()
|
||||
self.leader_get.return_value = "1234"
|
||||
self.is_leader.return_value = True
|
||||
mock_get_db_helper.return_value = my_mock
|
||||
|
||||
percona_utils.create_nagios_user()
|
||||
my_mock.select.assert_called_once_with(
|
||||
"SELECT EXISTS(SELECT 1 FROM mysql.user WHERE user = 'nagios')"
|
||||
)
|
||||
my_mock.execute.assert_has_calls([
|
||||
mock.call("CREATE USER 'nagios'@'localhost';"),
|
||||
mock.call("ALTER USER 'nagios'@'localhost' IDENTIFIED BY '1234';"),
|
||||
])
|
||||
|
||||
class OperationalError(Exception):
|
||||
pass
|
||||
|
||||
percona_utils.OperationalError = OperationalError
|
||||
|
||||
def mysql_create_user(*args, **kwargs):
|
||||
raise OperationalError()
|
||||
|
||||
my_mock.select.return_value = True
|
||||
my_mock.execute.side_effect = mysql_create_user
|
||||
with self.assertRaises(OperationalError):
|
||||
percona_utils.create_nagios_user()
|
||||
|
||||
def test_get_nrpe_threads_connected_thresholds(self):
|
||||
"""Test function for getting and verifying threshold values."""
|
||||
self.config.return_value = "a,1,2"
|
||||
with self.assertRaises(ValueError) as context:
|
||||
percona_utils.get_nrpe_threads_connected_thresholds()
|
||||
self.assertEqual(ValueError("the wrong number of values was set "
|
||||
"for the nrpe-threads-connected"),
|
||||
context.exception)
|
||||
|
||||
self.config.return_value = "a,1"
|
||||
with self.assertRaises(ValueError) as context:
|
||||
percona_utils.get_nrpe_threads_connected_thresholds()
|
||||
self.assertEqual(
|
||||
ValueError("invalid literal for int() with base 10: 'a'"),
|
||||
context.exception)
|
||||
|
||||
self.config.return_value = "50,200"
|
||||
with self.assertRaises(ValueError) as context:
|
||||
percona_utils.get_nrpe_threads_connected_thresholds()
|
||||
self.assertEqual(ValueError("the warning threshold must be in the "
|
||||
"range [0,100) and the critical "
|
||||
"threshold must be in the range "
|
||||
"(0,100]"),
|
||||
context.exception)
|
||||
|
||||
self.config.return_value = "90,60"
|
||||
with self.assertRaises(ValueError) as context:
|
||||
percona_utils.get_nrpe_threads_connected_thresholds()
|
||||
self.assertEqual(ValueError("the warning threshold must be less "
|
||||
"than critical"),
|
||||
context.exception)
|
||||
|
||||
self.config.return_value = "80,90"
|
||||
thresholds = percona_utils.get_nrpe_threads_connected_thresholds()
|
||||
self.assertEqual(thresholds, (80, 90))
|
||||
|
||||
|
||||
class UtilsTestsStatus(CharmTestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user