Use crm_mon for pacemaker-remote deployments
As described in bug #1728527 cibadmin does not expose the state of the pacemaker-remote nodes which means hostmonitor cannot track them. This change switches to use crm_mon to check the status of remote nodes if the new config option host.restrict_to_remotes to set to True. This will trigger host monitor to use crm_mon to monitor nodes and will only monitor nodes that are marked as remotes (not members). Change-Id: I3f2026805413504c875ea5f39eb036d44b26dd43 Depends-On: Iaa2251708616e9c69817bf5b346d795ea7a4d21b Closes-Bug: #1728527
This commit is contained in:
parent
ae3ab24f9a
commit
dc9b777724
@ -40,6 +40,10 @@ Possible values:
|
||||
|
||||
If ipmi RA is not set in pacemaker, this value should be set True.
|
||||
'''),
|
||||
cfg.BoolOpt('restrict_to_remotes',
|
||||
default=False,
|
||||
help='Only monitor pacemaker-remotes, ignore the status of'
|
||||
' full cluster members.'),
|
||||
cfg.IntOpt('ipmi_timeout',
|
||||
default=5,
|
||||
help='Timeout value(in seconds) of the ipmitool command.'),
|
||||
|
@ -23,6 +23,7 @@ from masakarimonitors.ha import masakari
|
||||
import masakarimonitors.hostmonitor.host_handler.driver as driver
|
||||
from masakarimonitors.hostmonitor.host_handler import hold_host_status
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
|
||||
from masakarimonitors.objects import event_constants as ec
|
||||
from masakarimonitors import utils
|
||||
|
||||
@ -30,6 +31,18 @@ LOG = oslo_logging.getLogger(__name__)
|
||||
CONF = masakarimonitors.conf.CONF
|
||||
|
||||
|
||||
class CibSchemaCompliantTag(dict):
|
||||
"""Create a dict which has the same attributes as a cib node tag.
|
||||
|
||||
Given a crm node tag convert it to a dict with corresponding cib tag
|
||||
attributes.
|
||||
"""
|
||||
def __init__(self, crmon_entry):
|
||||
self['uname'] = crmon_entry.get('name')
|
||||
online = crmon_entry.get('online')
|
||||
self['crmd'] = 'online' if online == 'true' else 'offline'
|
||||
|
||||
|
||||
class HandleHost(driver.DriverBase):
|
||||
"""Handle hosts.
|
||||
|
||||
@ -40,6 +53,7 @@ class HandleHost(driver.DriverBase):
|
||||
super(HandleHost, self).__init__()
|
||||
self.my_hostname = socket.gethostname()
|
||||
self.xml_parser = parse_cib_xml.ParseCibXml()
|
||||
self.crmmon_xml_parser = parse_crmmon_xml.ParseCrmMonXml()
|
||||
self.status_holder = hold_host_status.HostHoldStatus()
|
||||
self.notifier = masakari.SendNotification()
|
||||
|
||||
@ -168,6 +182,22 @@ class HandleHost(driver.DriverBase):
|
||||
|
||||
return out
|
||||
|
||||
def _get_crmmon_xml(self):
|
||||
"""Get summary of cluster's current state in XML format."""
|
||||
try:
|
||||
# Execute crm_mon command.
|
||||
out, err = utils.execute('crm_mon', '-X', run_as_root=True)
|
||||
|
||||
if err:
|
||||
msg = ("crmmon command output stderr: %s") % err
|
||||
raise Exception(msg)
|
||||
|
||||
except Exception as e:
|
||||
LOG.warning("Exception caught: %s", e)
|
||||
return
|
||||
|
||||
return out
|
||||
|
||||
def _is_poweroff(self, hostname):
|
||||
ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname)
|
||||
if ipmi_values is None:
|
||||
@ -298,6 +328,31 @@ class HandleHost(driver.DriverBase):
|
||||
# Update host status.
|
||||
self.status_holder.set_host_status(node_state_tag)
|
||||
|
||||
def _check_host_status_by_crm_mon(self):
|
||||
crmmon_xml = self._get_crmmon_xml()
|
||||
if crmmon_xml is None:
|
||||
# crm_mon command failure.
|
||||
return 1
|
||||
|
||||
# Set to the ParseCrmMonXml object.
|
||||
self.crmmon_xml_parser.set_crmmon_xml(crmmon_xml)
|
||||
|
||||
# Get node_state tag list.
|
||||
node_state_tag_list = self.crmmon_xml_parser.get_node_state_tag_list()
|
||||
if len(node_state_tag_list) == 0:
|
||||
# If crmmon xml doesn't have node_state tag,
|
||||
# it is an unexpected result.
|
||||
raise Exception(
|
||||
"Failed to get nodes tag from crm_mon xml.")
|
||||
|
||||
node_state_tag_list = [CibSchemaCompliantTag(n)
|
||||
for n in node_state_tag_list
|
||||
if n.get('type') == 'remote']
|
||||
# Check if status changed.
|
||||
self._check_if_status_changed(node_state_tag_list)
|
||||
|
||||
return 0
|
||||
|
||||
def _check_host_status_by_cibadmin(self):
|
||||
# Get xml of cib info.
|
||||
cib_xml = self._get_cib_xml()
|
||||
@ -362,8 +417,13 @@ class HandleHost(driver.DriverBase):
|
||||
CONF.host.monitoring_interval)
|
||||
continue
|
||||
|
||||
# Check the host status is online or offline by cibadmin.
|
||||
if self._check_host_status_by_cibadmin() != 0:
|
||||
# Check the host status is online or offline.
|
||||
if CONF.host.restrict_to_remotes:
|
||||
status_func = self._check_host_status_by_crm_mon
|
||||
else:
|
||||
status_func = self._check_host_status_by_cibadmin
|
||||
|
||||
if status_func() != 0:
|
||||
LOG.warning("hostmonitor skips monitoring hosts.")
|
||||
eventlet.greenthread.sleep(CONF.host.monitoring_interval)
|
||||
continue
|
||||
|
@ -0,0 +1,81 @@
|
||||
# Copyright(c) 2019 Canonical Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from oslo_log import log as oslo_logging
|
||||
|
||||
LOG = oslo_logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ParseCrmMonXml(object):
|
||||
"""ParseCrmMonXml class
|
||||
|
||||
This class parses the crmmon xml.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.crmmon_tag = None
|
||||
|
||||
def set_crmmon_xml(self, crmmon_xml):
|
||||
"""Set xml.etree.ElementTree.Element object.
|
||||
|
||||
This method receives string of crmmon xml, and convert it
|
||||
to xml.etree.ElementTree.Element object.
|
||||
|
||||
:params crmmon_xml: String of crmmon xml
|
||||
"""
|
||||
# Convert xml.etree.ElementTree.Element object.
|
||||
self.crmmon_tag = ElementTree.fromstring(crmmon_xml)
|
||||
|
||||
def _get_nodes(self):
|
||||
# status tag exists in the crmmon tag.
|
||||
if self.crmmon_tag is None:
|
||||
return None
|
||||
child_list = self.crmmon_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'nodes':
|
||||
return child
|
||||
return None
|
||||
|
||||
def _get_node_states(self, nodes_tag):
|
||||
node_state_tag_list = []
|
||||
|
||||
# node_state tag exists in the status tag.
|
||||
child_list = nodes_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'node':
|
||||
node_state_tag_list.append(child)
|
||||
|
||||
return node_state_tag_list
|
||||
|
||||
def get_node_state_tag_list(self):
|
||||
"""Get node_state tag list.
|
||||
|
||||
This method gets node_state tag list from crmmon xml.
|
||||
|
||||
:returns: node_state tag list
|
||||
"""
|
||||
# Get status tag.
|
||||
nodes_tag = self._get_nodes()
|
||||
if nodes_tag is None:
|
||||
LOG.error("crm_mon xml doesn't have nodes tag.")
|
||||
return []
|
||||
|
||||
# Get node_state tag list.
|
||||
node_state_tag_list = self._get_node_states(nodes_tag)
|
||||
if len(node_state_tag_list) == 0:
|
||||
LOG.error("crm_mon xml doesn't have online tag.")
|
||||
|
||||
return node_state_tag_list
|
@ -25,6 +25,7 @@ from masakarimonitors.ha import masakari
|
||||
from masakarimonitors.hostmonitor.host_handler import handle_host
|
||||
from masakarimonitors.hostmonitor.host_handler import hold_host_status
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
|
||||
from masakarimonitors.objects import event_constants as ec
|
||||
from masakarimonitors import utils
|
||||
|
||||
@ -48,6 +49,52 @@ STATUS_TAG_XML = ' <status>' \
|
||||
' <test foo="foo"/>' \
|
||||
' </node_state>' \
|
||||
' </status>'
|
||||
CRMMON_NODES_TAG_XML = """
|
||||
<nodes>
|
||||
<node name="member1" id="1002" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="true" is_dc="false"
|
||||
resources_running="2" type="member" />
|
||||
<node name="member2" id="1001" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="true" is_dc="true"
|
||||
resources_running="1" type="member" />
|
||||
<node name="remote1" id="remotehostname1" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="false"
|
||||
is_dc="false" resources_running="0" type="remote" />
|
||||
<node name="remote2" id="remotehostname2" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="false" is_dc="false"
|
||||
resources_running="0" type="remote" />
|
||||
<node name="remote3" id="remotehostname3" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="false" is_dc="false"
|
||||
resources_running="0" type="remote" />
|
||||
<node name="member3" id="1000" online="true" standby="false"
|
||||
standby_onfail="false" maintenance="false" pending="false"
|
||||
unclean="false" shutdown="false" expected_up="true" is_dc="false"
|
||||
resources_running="4" type="member" />
|
||||
</nodes>
|
||||
"""
|
||||
|
||||
|
||||
class TestCibSchemaCompliantTag(testtools.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestCibSchemaCompliantTag, self).setUp()
|
||||
|
||||
def test_init_offline(self):
|
||||
tag = handle_host.CibSchemaCompliantTag(
|
||||
{'name': 'test1', 'online': 'false'})
|
||||
self.assertEqual(tag['uname'], 'test1')
|
||||
self.assertEqual(tag['crmd'], 'offline')
|
||||
|
||||
def test_init_online(self):
|
||||
tag = handle_host.CibSchemaCompliantTag(
|
||||
{'name': 'test1', 'online': 'true'})
|
||||
self.assertEqual(tag['uname'], 'test1')
|
||||
self.assertEqual(tag['crmd'], 'online')
|
||||
|
||||
|
||||
class TestHandleHost(testtools.TestCase):
|
||||
@ -309,6 +356,28 @@ class TestHandleHost(testtools.TestCase):
|
||||
mock_execute.assert_called_once_with(
|
||||
'cibadmin', '--query', run_as_root=True)
|
||||
|
||||
@mock.patch.object(utils, 'execute')
|
||||
def test_get_crmmon_xml(self, mock_execute):
|
||||
mock_execute.return_value = ('test_stdout', '')
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
ret = obj._get_crmmon_xml()
|
||||
|
||||
self.assertEqual('test_stdout', ret)
|
||||
mock_execute.assert_called_once_with(
|
||||
'crm_mon', '-X', run_as_root=True)
|
||||
|
||||
@mock.patch.object(utils, 'execute')
|
||||
def test_get_crmmon_xml_stderr(self, mock_execute):
|
||||
mock_execute.return_value = ('test_stdout', 'test_stderr')
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
ret = obj._get_crmmon_xml()
|
||||
|
||||
self.assertIsNone(ret)
|
||||
mock_execute.assert_called_once_with(
|
||||
'crm_mon', '-X', run_as_root=True)
|
||||
|
||||
@mock.patch.object(utils, 'execute')
|
||||
@mock.patch.object(parse_cib_xml.ParseCibXml, 'get_stonith_ipmi_params')
|
||||
def test_is_poweroff(self, mock_get_stonith_ipmi_params, mock_execute):
|
||||
@ -570,6 +639,65 @@ class TestHandleHost(testtools.TestCase):
|
||||
mock_send_notification.assert_called_once_with(
|
||||
CONF.host.api_retry_max, CONF.host.api_retry_interval, test_event)
|
||||
|
||||
@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
|
||||
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
|
||||
'get_node_state_tag_list')
|
||||
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
|
||||
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
|
||||
def test_check_host_status_by_crm_mon(
|
||||
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
|
||||
mock_get_node_state_tag_list, mock_check_if_status_changed):
|
||||
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
|
||||
mock_set_crmmon_xml.return_value = None
|
||||
status_tag = ElementTree.fromstring(CRMMON_NODES_TAG_XML)
|
||||
node_state_tag_list = status_tag.getchildren()
|
||||
mock_get_node_state_tag_list.return_value = node_state_tag_list
|
||||
mock_check_if_status_changed.return_value = None
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
ret = obj._check_host_status_by_crm_mon()
|
||||
|
||||
self.assertEqual(0, ret)
|
||||
mock_get_node_state_tag_list.assert_called_once_with()
|
||||
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
|
||||
mock_get_node_state_tag_list.assert_called_once_with()
|
||||
mock_check_if_status_changed.assert_called_once_with(
|
||||
[
|
||||
{'uname': 'remote1', 'crmd': 'online'},
|
||||
{'uname': 'remote2', 'crmd': 'online'},
|
||||
{'uname': 'remote3', 'crmd': 'online'}])
|
||||
|
||||
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
|
||||
'get_node_state_tag_list')
|
||||
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
|
||||
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
|
||||
def test_check_host_status_by_crm_mon_not_have_node_state_tag(
|
||||
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
|
||||
mock_get_node_state_tag_list):
|
||||
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
|
||||
mock_set_crmmon_xml.return_value = None
|
||||
mock_get_node_state_tag_list.return_value = []
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
|
||||
self.assertRaisesRegexp(
|
||||
Exception, "Failed to get nodes tag from crm_mon xml.",
|
||||
obj._check_host_status_by_crm_mon)
|
||||
mock_get_crmmon_xml.assert_called_once_with()
|
||||
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
|
||||
mock_get_node_state_tag_list.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
|
||||
def test_check_host_status_by_crm_mon_xml_is_None(
|
||||
self, mock_get_crmmon_xml):
|
||||
mock_get_crmmon_xml.return_value = None
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
ret = obj._check_host_status_by_crm_mon()
|
||||
|
||||
self.assertEqual(1, ret)
|
||||
mock_get_crmmon_xml.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
|
||||
@mock.patch.object(parse_cib_xml.ParseCibXml, 'get_node_state_tag_list')
|
||||
@mock.patch.object(parse_cib_xml.ParseCibXml, 'have_quorum')
|
||||
@ -693,3 +821,30 @@ class TestHandleHost(testtools.TestCase):
|
||||
mock_check_pacemaker_services.assert_called_with('pacemaker_remote')
|
||||
self.assertEqual(2, mock_check_host_status_by_cibadmin.call_count)
|
||||
self.assertEqual(2, mock_check_host_status_by_crmadmin.call_count)
|
||||
|
||||
@mock.patch.object(eventlet.greenthread, 'sleep')
|
||||
@mock.patch.object(handle_host.HandleHost,
|
||||
'_check_host_status_by_crm_mon')
|
||||
@mock.patch.object(handle_host.HandleHost, '_check_pacemaker_services')
|
||||
@mock.patch.object(handle_host.HandleHost, '_check_hb_line')
|
||||
def test_monitor_hosts_remotes_only(self,
|
||||
mock_check_hb_line,
|
||||
mock_check_pacemaker_services,
|
||||
mock_check_host_status_by_crm_mon,
|
||||
mock_sleep):
|
||||
|
||||
CONF.host.restrict_to_remotes = True
|
||||
mock_check_hb_line.side_effect = \
|
||||
[0, Exception("Test exception.")]
|
||||
mock_check_pacemaker_services.return_value = True
|
||||
mock_check_host_status_by_crm_mon.side_effect = 0
|
||||
mock_sleep.return_value = None
|
||||
|
||||
obj = handle_host.HandleHost()
|
||||
obj.monitor_hosts()
|
||||
|
||||
self.assertEqual(1, mock_check_hb_line.call_count)
|
||||
self.assertEqual(1, mock_check_pacemaker_services.call_count)
|
||||
mock_check_pacemaker_services.assert_called_with('pacemaker_remote')
|
||||
self.assertEqual(1, mock_check_host_status_by_crm_mon.call_count)
|
||||
mock_check_host_status_by_crm_mon.assert_called_once_with()
|
||||
|
@ -0,0 +1,78 @@
|
||||
# Copyright(c) 2019 Canonical Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import testtools
|
||||
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
|
||||
|
||||
|
||||
CRMMON_XML = '<?xml version="1.0"?>' \
|
||||
'<crm_mon version="1.1.18">' \
|
||||
' <nodes>' \
|
||||
' <node name="node-1" id="1001" online="true" />' \
|
||||
' <node name="node-2" id="1002" online="false" />' \
|
||||
' <node name="node-3" id="1003" online="true" />' \
|
||||
' </nodes>' \
|
||||
'</crm_mon>'
|
||||
|
||||
CRMMON_NONODES_XML = '<?xml version="1.0"?>' \
|
||||
'<crm_mon version="1.1.18">' \
|
||||
' <nodes>' \
|
||||
' </nodes>' \
|
||||
'</crm_mon>'
|
||||
|
||||
CRMMON_NONODES_TAG_XML = '<?xml version="1.0"?>' \
|
||||
'<crm_mon version="1.1.18">' \
|
||||
'</crm_mon>'
|
||||
|
||||
|
||||
class TestParseCrmMonXml(testtools.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestParseCrmMonXml, self).setUp()
|
||||
|
||||
def test_set_crmmon_xml(self):
|
||||
obj = parse_crmmon_xml.ParseCrmMonXml()
|
||||
obj.set_crmmon_xml(CRMMON_XML)
|
||||
|
||||
def test_get_node_state_tag_list(self):
|
||||
obj = parse_crmmon_xml.ParseCrmMonXml()
|
||||
obj.set_crmmon_xml(CRMMON_XML)
|
||||
|
||||
node_state_tag_list = obj.get_node_state_tag_list()
|
||||
|
||||
expected = {
|
||||
'node-1': 'true',
|
||||
'node-2': 'false',
|
||||
'node-3': 'true'}
|
||||
|
||||
for node_state_tag in node_state_tag_list:
|
||||
self.assertEqual(
|
||||
expected[node_state_tag.get('name')],
|
||||
node_state_tag.get('online'))
|
||||
|
||||
def test_get_node_state_tag_list_unset(self):
|
||||
obj = parse_crmmon_xml.ParseCrmMonXml()
|
||||
self.assertEqual(obj.get_node_state_tag_list(), [])
|
||||
|
||||
def test_get_node_state_tag_list_nonodes(self):
|
||||
obj = parse_crmmon_xml.ParseCrmMonXml()
|
||||
obj.set_crmmon_xml(CRMMON_NONODES_XML)
|
||||
self.assertEqual(obj.get_node_state_tag_list(), [])
|
||||
|
||||
def test_get_node_state_tag_list_nonodes_tag(self):
|
||||
obj = parse_crmmon_xml.ParseCrmMonXml()
|
||||
obj.set_crmmon_xml(CRMMON_NONODES_TAG_XML)
|
||||
self.assertEqual(obj.get_node_state_tag_list(), [])
|
Loading…
x
Reference in New Issue
Block a user