Browse Source

Use crm_mon for pacemaker-remote deployments

As described in bug #1728527 cibadmin does not expose the state of
the pacemaker-remote nodes which means hostmonitor cannot track
them. This change switches to use crm_mon to check the status of
remote nodes if the new config option host.restrict_to_remotes
to set to True. This will trigger host monitor to use crm_mon
to monitor nodes and will only monitor nodes that are marked
as remotes (not members).

Change-Id: I3f2026805413504c875ea5f39eb036d44b26dd43
Depends-On: Iaa2251708616e9c69817bf5b346d795ea7a4d21b
Closes-Bug: #1728527
tags/9.0.0.0rc1
Liam Young 1 year ago
parent
commit
dc9b777724
5 changed files with 380 additions and 2 deletions
  1. +4
    -0
      masakarimonitors/conf/host.py
  2. +62
    -2
      masakarimonitors/hostmonitor/host_handler/handle_host.py
  3. +81
    -0
      masakarimonitors/hostmonitor/host_handler/parse_crmmon_xml.py
  4. +155
    -0
      masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py
  5. +78
    -0
      masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_crmmon_xml.py

+ 4
- 0
masakarimonitors/conf/host.py View File

@@ -40,6 +40,10 @@ Possible values:

If ipmi RA is not set in pacemaker, this value should be set True.
'''),
cfg.BoolOpt('restrict_to_remotes',
default=False,
help='Only monitor pacemaker-remotes, ignore the status of'
' full cluster members.'),
cfg.IntOpt('ipmi_timeout',
default=5,
help='Timeout value(in seconds) of the ipmitool command.'),


+ 62
- 2
masakarimonitors/hostmonitor/host_handler/handle_host.py View File

@@ -23,6 +23,7 @@ from masakarimonitors.ha import masakari
import masakarimonitors.hostmonitor.host_handler.driver as driver
from masakarimonitors.hostmonitor.host_handler import hold_host_status
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
from masakarimonitors.objects import event_constants as ec
from masakarimonitors import utils

@@ -30,6 +31,18 @@ LOG = oslo_logging.getLogger(__name__)
CONF = masakarimonitors.conf.CONF


class CibSchemaCompliantTag(dict):
"""Create a dict which has the same attributes as a cib node tag.

Given a crm node tag convert it to a dict with corresponding cib tag
attributes.
"""
def __init__(self, crmon_entry):
self['uname'] = crmon_entry.get('name')
online = crmon_entry.get('online')
self['crmd'] = 'online' if online == 'true' else 'offline'


class HandleHost(driver.DriverBase):
"""Handle hosts.

@@ -40,6 +53,7 @@ class HandleHost(driver.DriverBase):
super(HandleHost, self).__init__()
self.my_hostname = socket.gethostname()
self.xml_parser = parse_cib_xml.ParseCibXml()
self.crmmon_xml_parser = parse_crmmon_xml.ParseCrmMonXml()
self.status_holder = hold_host_status.HostHoldStatus()
self.notifier = masakari.SendNotification()

@@ -168,6 +182,22 @@ class HandleHost(driver.DriverBase):

return out

def _get_crmmon_xml(self):
"""Get summary of cluster's current state in XML format."""
try:
# Execute crm_mon command.
out, err = utils.execute('crm_mon', '-X', run_as_root=True)

if err:
msg = ("crmmon command output stderr: %s") % err
raise Exception(msg)

except Exception as e:
LOG.warning("Exception caught: %s", e)
return

return out

def _is_poweroff(self, hostname):
ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname)
if ipmi_values is None:
@@ -298,6 +328,31 @@ class HandleHost(driver.DriverBase):
# Update host status.
self.status_holder.set_host_status(node_state_tag)

def _check_host_status_by_crm_mon(self):
crmmon_xml = self._get_crmmon_xml()
if crmmon_xml is None:
# crm_mon command failure.
return 1

# Set to the ParseCrmMonXml object.
self.crmmon_xml_parser.set_crmmon_xml(crmmon_xml)

# Get node_state tag list.
node_state_tag_list = self.crmmon_xml_parser.get_node_state_tag_list()
if len(node_state_tag_list) == 0:
# If crmmon xml doesn't have node_state tag,
# it is an unexpected result.
raise Exception(
"Failed to get nodes tag from crm_mon xml.")

node_state_tag_list = [CibSchemaCompliantTag(n)
for n in node_state_tag_list
if n.get('type') == 'remote']
# Check if status changed.
self._check_if_status_changed(node_state_tag_list)

return 0

def _check_host_status_by_cibadmin(self):
# Get xml of cib info.
cib_xml = self._get_cib_xml()
@@ -362,8 +417,13 @@ class HandleHost(driver.DriverBase):
CONF.host.monitoring_interval)
continue

# Check the host status is online or offline by cibadmin.
if self._check_host_status_by_cibadmin() != 0:
# Check the host status is online or offline.
if CONF.host.restrict_to_remotes:
status_func = self._check_host_status_by_crm_mon
else:
status_func = self._check_host_status_by_cibadmin

if status_func() != 0:
LOG.warning("hostmonitor skips monitoring hosts.")
eventlet.greenthread.sleep(CONF.host.monitoring_interval)
continue


+ 81
- 0
masakarimonitors/hostmonitor/host_handler/parse_crmmon_xml.py View File

@@ -0,0 +1,81 @@
# Copyright(c) 2019 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from xml.etree import ElementTree

from oslo_log import log as oslo_logging

LOG = oslo_logging.getLogger(__name__)


class ParseCrmMonXml(object):
"""ParseCrmMonXml class

This class parses the crmmon xml.
"""

def __init__(self):
self.crmmon_tag = None

def set_crmmon_xml(self, crmmon_xml):
"""Set xml.etree.ElementTree.Element object.

This method receives string of crmmon xml, and convert it
to xml.etree.ElementTree.Element object.

:params crmmon_xml: String of crmmon xml
"""
# Convert xml.etree.ElementTree.Element object.
self.crmmon_tag = ElementTree.fromstring(crmmon_xml)

def _get_nodes(self):
# status tag exists in the crmmon tag.
if self.crmmon_tag is None:
return None
child_list = self.crmmon_tag.getchildren()
for child in child_list:
if child.tag == 'nodes':
return child
return None

def _get_node_states(self, nodes_tag):
node_state_tag_list = []

# node_state tag exists in the status tag.
child_list = nodes_tag.getchildren()
for child in child_list:
if child.tag == 'node':
node_state_tag_list.append(child)

return node_state_tag_list

def get_node_state_tag_list(self):
"""Get node_state tag list.

This method gets node_state tag list from crmmon xml.

:returns: node_state tag list
"""
# Get status tag.
nodes_tag = self._get_nodes()
if nodes_tag is None:
LOG.error("crm_mon xml doesn't have nodes tag.")
return []

# Get node_state tag list.
node_state_tag_list = self._get_node_states(nodes_tag)
if len(node_state_tag_list) == 0:
LOG.error("crm_mon xml doesn't have online tag.")

return node_state_tag_list

+ 155
- 0
masakarimonitors/tests/unit/hostmonitor/host_handler/test_handle_host.py View File

@@ -25,6 +25,7 @@ from masakarimonitors.ha import masakari
from masakarimonitors.hostmonitor.host_handler import handle_host
from masakarimonitors.hostmonitor.host_handler import hold_host_status
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
from masakarimonitors.objects import event_constants as ec
from masakarimonitors import utils

@@ -48,6 +49,52 @@ STATUS_TAG_XML = ' <status>' \
' <test foo="foo"/>' \
' </node_state>' \
' </status>'
CRMMON_NODES_TAG_XML = """
<nodes>
<node name="member1" id="1002" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="true" is_dc="false"
resources_running="2" type="member" />
<node name="member2" id="1001" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="true" is_dc="true"
resources_running="1" type="member" />
<node name="remote1" id="remotehostname1" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="false"
is_dc="false" resources_running="0" type="remote" />
<node name="remote2" id="remotehostname2" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="false" is_dc="false"
resources_running="0" type="remote" />
<node name="remote3" id="remotehostname3" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="false" is_dc="false"
resources_running="0" type="remote" />
<node name="member3" id="1000" online="true" standby="false"
standby_onfail="false" maintenance="false" pending="false"
unclean="false" shutdown="false" expected_up="true" is_dc="false"
resources_running="4" type="member" />
</nodes>
"""


class TestCibSchemaCompliantTag(testtools.TestCase):

def setUp(self):
super(TestCibSchemaCompliantTag, self).setUp()

def test_init_offline(self):
tag = handle_host.CibSchemaCompliantTag(
{'name': 'test1', 'online': 'false'})
self.assertEqual(tag['uname'], 'test1')
self.assertEqual(tag['crmd'], 'offline')

def test_init_online(self):
tag = handle_host.CibSchemaCompliantTag(
{'name': 'test1', 'online': 'true'})
self.assertEqual(tag['uname'], 'test1')
self.assertEqual(tag['crmd'], 'online')


class TestHandleHost(testtools.TestCase):
@@ -309,6 +356,28 @@ class TestHandleHost(testtools.TestCase):
mock_execute.assert_called_once_with(
'cibadmin', '--query', run_as_root=True)

@mock.patch.object(utils, 'execute')
def test_get_crmmon_xml(self, mock_execute):
mock_execute.return_value = ('test_stdout', '')

obj = handle_host.HandleHost()
ret = obj._get_crmmon_xml()

self.assertEqual('test_stdout', ret)
mock_execute.assert_called_once_with(
'crm_mon', '-X', run_as_root=True)

@mock.patch.object(utils, 'execute')
def test_get_crmmon_xml_stderr(self, mock_execute):
mock_execute.return_value = ('test_stdout', 'test_stderr')

obj = handle_host.HandleHost()
ret = obj._get_crmmon_xml()

self.assertIsNone(ret)
mock_execute.assert_called_once_with(
'crm_mon', '-X', run_as_root=True)

@mock.patch.object(utils, 'execute')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'get_stonith_ipmi_params')
def test_is_poweroff(self, mock_get_stonith_ipmi_params, mock_execute):
@@ -570,6 +639,65 @@ class TestHandleHost(testtools.TestCase):
mock_send_notification.assert_called_once_with(
CONF.host.api_retry_max, CONF.host.api_retry_interval, test_event)

@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'get_node_state_tag_list')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon(
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
mock_get_node_state_tag_list, mock_check_if_status_changed):
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
mock_set_crmmon_xml.return_value = None
status_tag = ElementTree.fromstring(CRMMON_NODES_TAG_XML)
node_state_tag_list = status_tag.getchildren()
mock_get_node_state_tag_list.return_value = node_state_tag_list
mock_check_if_status_changed.return_value = None

obj = handle_host.HandleHost()
ret = obj._check_host_status_by_crm_mon()

self.assertEqual(0, ret)
mock_get_node_state_tag_list.assert_called_once_with()
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
mock_get_node_state_tag_list.assert_called_once_with()
mock_check_if_status_changed.assert_called_once_with(
[
{'uname': 'remote1', 'crmd': 'online'},
{'uname': 'remote2', 'crmd': 'online'},
{'uname': 'remote3', 'crmd': 'online'}])

@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'get_node_state_tag_list')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon_not_have_node_state_tag(
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
mock_get_node_state_tag_list):
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
mock_set_crmmon_xml.return_value = None
mock_get_node_state_tag_list.return_value = []

obj = handle_host.HandleHost()

self.assertRaisesRegexp(
Exception, "Failed to get nodes tag from crm_mon xml.",
obj._check_host_status_by_crm_mon)
mock_get_crmmon_xml.assert_called_once_with()
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
mock_get_node_state_tag_list.assert_called_once_with()

@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon_xml_is_None(
self, mock_get_crmmon_xml):
mock_get_crmmon_xml.return_value = None

obj = handle_host.HandleHost()
ret = obj._check_host_status_by_crm_mon()

self.assertEqual(1, ret)
mock_get_crmmon_xml.assert_called_once_with()

@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'get_node_state_tag_list')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'have_quorum')
@@ -693,3 +821,30 @@ class TestHandleHost(testtools.TestCase):
mock_check_pacemaker_services.assert_called_with('pacemaker_remote')
self.assertEqual(2, mock_check_host_status_by_cibadmin.call_count)
self.assertEqual(2, mock_check_host_status_by_crmadmin.call_count)

@mock.patch.object(eventlet.greenthread, 'sleep')
@mock.patch.object(handle_host.HandleHost,
'_check_host_status_by_crm_mon')
@mock.patch.object(handle_host.HandleHost, '_check_pacemaker_services')
@mock.patch.object(handle_host.HandleHost, '_check_hb_line')
def test_monitor_hosts_remotes_only(self,
mock_check_hb_line,
mock_check_pacemaker_services,
mock_check_host_status_by_crm_mon,
mock_sleep):

CONF.host.restrict_to_remotes = True
mock_check_hb_line.side_effect = \
[0, Exception("Test exception.")]
mock_check_pacemaker_services.return_value = True
mock_check_host_status_by_crm_mon.side_effect = 0
mock_sleep.return_value = None

obj = handle_host.HandleHost()
obj.monitor_hosts()

self.assertEqual(1, mock_check_hb_line.call_count)
self.assertEqual(1, mock_check_pacemaker_services.call_count)
mock_check_pacemaker_services.assert_called_with('pacemaker_remote')
self.assertEqual(1, mock_check_host_status_by_crm_mon.call_count)
mock_check_host_status_by_crm_mon.assert_called_once_with()

+ 78
- 0
masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_crmmon_xml.py View File

@@ -0,0 +1,78 @@
# Copyright(c) 2019 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import testtools

from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml


CRMMON_XML = '<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
' <nodes>' \
' <node name="node-1" id="1001" online="true" />' \
' <node name="node-2" id="1002" online="false" />' \
' <node name="node-3" id="1003" online="true" />' \
' </nodes>' \
'</crm_mon>'

CRMMON_NONODES_XML = '<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
' <nodes>' \
' </nodes>' \
'</crm_mon>'

CRMMON_NONODES_TAG_XML = '<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
'</crm_mon>'


class TestParseCrmMonXml(testtools.TestCase):

def setUp(self):
super(TestParseCrmMonXml, self).setUp()

def test_set_crmmon_xml(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML)

def test_get_node_state_tag_list(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML)

node_state_tag_list = obj.get_node_state_tag_list()

expected = {
'node-1': 'true',
'node-2': 'false',
'node-3': 'true'}

for node_state_tag in node_state_tag_list:
self.assertEqual(
expected[node_state_tag.get('name')],
node_state_tag.get('online'))

def test_get_node_state_tag_list_unset(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
self.assertEqual(obj.get_node_state_tag_list(), [])

def test_get_node_state_tag_list_nonodes(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_NONODES_XML)
self.assertEqual(obj.get_node_state_tag_list(), [])

def test_get_node_state_tag_list_nonodes_tag(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_NONODES_TAG_XML)
self.assertEqual(obj.get_node_state_tag_list(), [])

Loading…
Cancel
Save