diff --git a/masakarimonitors/conf/host.py b/masakarimonitors/conf/host.py index 9a8461e..e4cd976 100644 --- a/masakarimonitors/conf/host.py +++ b/masakarimonitors/conf/host.py @@ -40,6 +40,10 @@ Possible values: If ipmi RA is not set in pacemaker, this value should be set True. '''), + cfg.BoolOpt('restrict_to_remotes', + default=False, + help='Only monitor pacemaker-remotes, ignore the status of' + ' full cluster members.'), cfg.IntOpt('ipmi_timeout', default=5, help='Timeout value(in seconds) of the ipmitool command.'), diff --git a/masakarimonitors/hostmonitor/host_handler/handle_host.py b/masakarimonitors/hostmonitor/host_handler/handle_host.py index 974b6bd..98c6e55 100644 --- a/masakarimonitors/hostmonitor/host_handler/handle_host.py +++ b/masakarimonitors/hostmonitor/host_handler/handle_host.py @@ -23,6 +23,7 @@ from masakarimonitors.ha import masakari import masakarimonitors.hostmonitor.host_handler.driver as driver from masakarimonitors.hostmonitor.host_handler import hold_host_status from masakarimonitors.hostmonitor.host_handler import parse_cib_xml +from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml from masakarimonitors.objects import event_constants as ec from masakarimonitors import utils @@ -30,6 +31,18 @@ LOG = oslo_logging.getLogger(__name__) CONF = masakarimonitors.conf.CONF +class CibSchemaCompliantTag(dict): + """Create a dict which has the same attributes as a cib node tag. + + Given a crm node tag convert it to a dict with corresponding cib tag + attributes. + """ + def __init__(self, crmon_entry): + self['uname'] = crmon_entry.get('name') + online = crmon_entry.get('online') + self['crmd'] = 'online' if online == 'true' else 'offline' + + class HandleHost(driver.DriverBase): """Handle hosts. @@ -40,6 +53,7 @@ class HandleHost(driver.DriverBase): super(HandleHost, self).__init__() self.my_hostname = socket.gethostname() self.xml_parser = parse_cib_xml.ParseCibXml() + self.crmmon_xml_parser = parse_crmmon_xml.ParseCrmMonXml() self.status_holder = hold_host_status.HostHoldStatus() self.notifier = masakari.SendNotification() @@ -168,6 +182,22 @@ class HandleHost(driver.DriverBase): return out + def _get_crmmon_xml(self): + """Get summary of cluster's current state in XML format.""" + try: + # Execute crm_mon command. + out, err = utils.execute('crm_mon', '-X', run_as_root=True) + + if err: + msg = ("crmmon command output stderr: %s") % err + raise Exception(msg) + + except Exception as e: + LOG.warning("Exception caught: %s", e) + return + + return out + def _is_poweroff(self, hostname): ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname) if ipmi_values is None: @@ -298,6 +328,31 @@ class HandleHost(driver.DriverBase): # Update host status. self.status_holder.set_host_status(node_state_tag) + def _check_host_status_by_crm_mon(self): + crmmon_xml = self._get_crmmon_xml() + if crmmon_xml is None: + # crm_mon command failure. + return 1 + + # Set to the ParseCrmMonXml object. + self.crmmon_xml_parser.set_crmmon_xml(crmmon_xml) + + # Get node_state tag list. + node_state_tag_list = self.crmmon_xml_parser.get_node_state_tag_list() + if len(node_state_tag_list) == 0: + # If crmmon xml doesn't have node_state tag, + # it is an unexpected result. + raise Exception( + "Failed to get nodes tag from crm_mon xml.") + + node_state_tag_list = [CibSchemaCompliantTag(n) + for n in node_state_tag_list + if n.get('type') == 'remote'] + # Check if status changed. + self._check_if_status_changed(node_state_tag_list) + + return 0 + def _check_host_status_by_cibadmin(self): # Get xml of cib info. cib_xml = self._get_cib_xml() @@ -362,8 +417,13 @@ class HandleHost(driver.DriverBase): CONF.host.monitoring_interval) continue - # Check the host status is online or offline by cibadmin. - if self._check_host_status_by_cibadmin() != 0: + # Check the host status is online or offline. + if CONF.host.restrict_to_remotes: + status_func = self._check_host_status_by_crm_mon + else: + status_func = self._check_host_status_by_cibadmin + + if status_func() != 0: LOG.warning("hostmonitor skips monitoring hosts.") eventlet.greenthread.sleep(CONF.host.monitoring_interval) continue diff --git a/masakarimonitors/hostmonitor/host_handler/parse_crmmon_xml.py b/masakarimonitors/hostmonitor/host_handler/parse_crmmon_xml.py new file mode 100644 index 0000000..7af995c --- /dev/null +++ b/masakarimonitors/hostmonitor/host_handler/parse_crmmon_xml.py @@ -0,0 +1,81 @@ +# Copyright(c) 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from xml.etree import ElementTree + +from oslo_log import log as oslo_logging + +LOG = oslo_logging.getLogger(__name__) + + +class ParseCrmMonXml(object): + """ParseCrmMonXml class + + This class parses the crmmon xml. + """ + + def __init__(self): + self.crmmon_tag = None + + def set_crmmon_xml(self, crmmon_xml): + """Set xml.etree.ElementTree.Element object. + + This method receives string of crmmon xml, and convert it + to xml.etree.ElementTree.Element object. + + :params crmmon_xml: String of crmmon xml + """ + # Convert xml.etree.ElementTree.Element object. + self.crmmon_tag = ElementTree.fromstring(crmmon_xml) + + def _get_nodes(self): + # status tag exists in the crmmon tag. + if self.crmmon_tag is None: + return None + child_list = self.crmmon_tag.getchildren() + for child in child_list: + if child.tag == 'nodes': + return child + return None + + def _get_node_states(self, nodes_tag): + node_state_tag_list = [] + + # node_state tag exists in the status tag. + child_list = nodes_tag.getchildren() + for child in child_list: + if child.tag == 'node': + node_state_tag_list.append(child) + + return node_state_tag_list + + def get_node_state_tag_list(self): + """Get node_state tag list. + + This method gets node_state tag list from crmmon xml. + + :returns: node_state tag list + """ + # Get status tag. + nodes_tag = self._get_nodes() + if nodes_tag is None: + LOG.error("crm_mon xml doesn't have nodes tag.") + return [] + + # Get node_state tag list. + node_state_tag_list = self._get_node_states(nodes_tag) + if len(node_state_tag_list) == 0: + LOG.error("crm_mon xml doesn't have online tag.") + + return node_state_tag_list diff --git a/masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py index 563b1b0..704167f 100644 --- a/masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py +++ b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py @@ -25,6 +25,7 @@ from masakarimonitors.ha import masakari from masakarimonitors.hostmonitor.host_handler import handle_host from masakarimonitors.hostmonitor.host_handler import hold_host_status from masakarimonitors.hostmonitor.host_handler import parse_cib_xml +from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml from masakarimonitors.objects import event_constants as ec from masakarimonitors import utils @@ -48,6 +49,52 @@ STATUS_TAG_XML = ' ' \ ' ' \ ' ' \ ' ' +CRMMON_NODES_TAG_XML = """ + + + + + + + + +""" + + +class TestCibSchemaCompliantTag(testtools.TestCase): + + def setUp(self): + super(TestCibSchemaCompliantTag, self).setUp() + + def test_init_offline(self): + tag = handle_host.CibSchemaCompliantTag( + {'name': 'test1', 'online': 'false'}) + self.assertEqual(tag['uname'], 'test1') + self.assertEqual(tag['crmd'], 'offline') + + def test_init_online(self): + tag = handle_host.CibSchemaCompliantTag( + {'name': 'test1', 'online': 'true'}) + self.assertEqual(tag['uname'], 'test1') + self.assertEqual(tag['crmd'], 'online') class TestHandleHost(testtools.TestCase): @@ -309,6 +356,28 @@ class TestHandleHost(testtools.TestCase): mock_execute.assert_called_once_with( 'cibadmin', '--query', run_as_root=True) + @mock.patch.object(utils, 'execute') + def test_get_crmmon_xml(self, mock_execute): + mock_execute.return_value = ('test_stdout', '') + + obj = handle_host.HandleHost() + ret = obj._get_crmmon_xml() + + self.assertEqual('test_stdout', ret) + mock_execute.assert_called_once_with( + 'crm_mon', '-X', run_as_root=True) + + @mock.patch.object(utils, 'execute') + def test_get_crmmon_xml_stderr(self, mock_execute): + mock_execute.return_value = ('test_stdout', 'test_stderr') + + obj = handle_host.HandleHost() + ret = obj._get_crmmon_xml() + + self.assertIsNone(ret) + mock_execute.assert_called_once_with( + 'crm_mon', '-X', run_as_root=True) + @mock.patch.object(utils, 'execute') @mock.patch.object(parse_cib_xml.ParseCibXml, 'get_stonith_ipmi_params') def test_is_poweroff(self, mock_get_stonith_ipmi_params, mock_execute): @@ -570,6 +639,65 @@ class TestHandleHost(testtools.TestCase): mock_send_notification.assert_called_once_with( CONF.host.api_retry_max, CONF.host.api_retry_interval, test_event) + @mock.patch.object(handle_host.HandleHost, '_check_if_status_changed') + @mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, + 'get_node_state_tag_list') + @mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml') + @mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml') + def test_check_host_status_by_crm_mon( + self, mock_get_crmmon_xml, mock_set_crmmon_xml, + mock_get_node_state_tag_list, mock_check_if_status_changed): + mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML + mock_set_crmmon_xml.return_value = None + status_tag = ElementTree.fromstring(CRMMON_NODES_TAG_XML) + node_state_tag_list = status_tag.getchildren() + mock_get_node_state_tag_list.return_value = node_state_tag_list + mock_check_if_status_changed.return_value = None + + obj = handle_host.HandleHost() + ret = obj._check_host_status_by_crm_mon() + + self.assertEqual(0, ret) + mock_get_node_state_tag_list.assert_called_once_with() + mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML) + mock_get_node_state_tag_list.assert_called_once_with() + mock_check_if_status_changed.assert_called_once_with( + [ + {'uname': 'remote1', 'crmd': 'online'}, + {'uname': 'remote2', 'crmd': 'online'}, + {'uname': 'remote3', 'crmd': 'online'}]) + + @mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, + 'get_node_state_tag_list') + @mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml') + @mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml') + def test_check_host_status_by_crm_mon_not_have_node_state_tag( + self, mock_get_crmmon_xml, mock_set_crmmon_xml, + mock_get_node_state_tag_list): + mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML + mock_set_crmmon_xml.return_value = None + mock_get_node_state_tag_list.return_value = [] + + obj = handle_host.HandleHost() + + self.assertRaisesRegexp( + Exception, "Failed to get nodes tag from crm_mon xml.", + obj._check_host_status_by_crm_mon) + mock_get_crmmon_xml.assert_called_once_with() + mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML) + mock_get_node_state_tag_list.assert_called_once_with() + + @mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml') + def test_check_host_status_by_crm_mon_xml_is_None( + self, mock_get_crmmon_xml): + mock_get_crmmon_xml.return_value = None + + obj = handle_host.HandleHost() + ret = obj._check_host_status_by_crm_mon() + + self.assertEqual(1, ret) + mock_get_crmmon_xml.assert_called_once_with() + @mock.patch.object(handle_host.HandleHost, '_check_if_status_changed') @mock.patch.object(parse_cib_xml.ParseCibXml, 'get_node_state_tag_list') @mock.patch.object(parse_cib_xml.ParseCibXml, 'have_quorum') @@ -693,3 +821,30 @@ class TestHandleHost(testtools.TestCase): mock_check_pacemaker_services.assert_called_with('pacemaker_remote') self.assertEqual(2, mock_check_host_status_by_cibadmin.call_count) self.assertEqual(2, mock_check_host_status_by_crmadmin.call_count) + + @mock.patch.object(eventlet.greenthread, 'sleep') + @mock.patch.object(handle_host.HandleHost, + '_check_host_status_by_crm_mon') + @mock.patch.object(handle_host.HandleHost, '_check_pacemaker_services') + @mock.patch.object(handle_host.HandleHost, '_check_hb_line') + def test_monitor_hosts_remotes_only(self, + mock_check_hb_line, + mock_check_pacemaker_services, + mock_check_host_status_by_crm_mon, + mock_sleep): + + CONF.host.restrict_to_remotes = True + mock_check_hb_line.side_effect = \ + [0, Exception("Test exception.")] + mock_check_pacemaker_services.return_value = True + mock_check_host_status_by_crm_mon.side_effect = 0 + mock_sleep.return_value = None + + obj = handle_host.HandleHost() + obj.monitor_hosts() + + self.assertEqual(1, mock_check_hb_line.call_count) + self.assertEqual(1, mock_check_pacemaker_services.call_count) + mock_check_pacemaker_services.assert_called_with('pacemaker_remote') + self.assertEqual(1, mock_check_host_status_by_crm_mon.call_count) + mock_check_host_status_by_crm_mon.assert_called_once_with() diff --git a/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_crmmon_xml.py b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_crmmon_xml.py new file mode 100644 index 0000000..102d652 --- /dev/null +++ b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_crmmon_xml.py @@ -0,0 +1,78 @@ +# Copyright(c) 2019 Canonical Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import testtools + +from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml + + +CRMMON_XML = '' \ + '' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + '' + +CRMMON_NONODES_XML = '' \ + '' \ + ' ' \ + ' ' \ + '' + +CRMMON_NONODES_TAG_XML = '' \ + '' \ + '' + + +class TestParseCrmMonXml(testtools.TestCase): + + def setUp(self): + super(TestParseCrmMonXml, self).setUp() + + def test_set_crmmon_xml(self): + obj = parse_crmmon_xml.ParseCrmMonXml() + obj.set_crmmon_xml(CRMMON_XML) + + def test_get_node_state_tag_list(self): + obj = parse_crmmon_xml.ParseCrmMonXml() + obj.set_crmmon_xml(CRMMON_XML) + + node_state_tag_list = obj.get_node_state_tag_list() + + expected = { + 'node-1': 'true', + 'node-2': 'false', + 'node-3': 'true'} + + for node_state_tag in node_state_tag_list: + self.assertEqual( + expected[node_state_tag.get('name')], + node_state_tag.get('online')) + + def test_get_node_state_tag_list_unset(self): + obj = parse_crmmon_xml.ParseCrmMonXml() + self.assertEqual(obj.get_node_state_tag_list(), []) + + def test_get_node_state_tag_list_nonodes(self): + obj = parse_crmmon_xml.ParseCrmMonXml() + obj.set_crmmon_xml(CRMMON_NONODES_XML) + self.assertEqual(obj.get_node_state_tag_list(), []) + + def test_get_node_state_tag_list_nonodes_tag(self): + obj = parse_crmmon_xml.ParseCrmMonXml() + obj.set_crmmon_xml(CRMMON_NONODES_TAG_XML) + self.assertEqual(obj.get_node_state_tag_list(), [])