Fix hostmonitor to respect quorum

Both cibadmin-based and crm_mon-based host status queryings were
affected, allowing partitioned cluster to tell Masakari to
evacuate hosts from the other partition (which nota bene include
all remotes if applicable).

Closes-Bug: #1878548
Change-Id: I0b1ca8a011ee4da162a2c3a986c1dab9a3d38190
This commit is contained in:
Radosław Piliszek 2021-09-13 19:27:52 +00:00
parent 4b99f7574c
commit c2d9a4f9cb
5 changed files with 90 additions and 13 deletions

View File

@ -372,6 +372,12 @@ class HandleHost(driver.DriverBase):
# Set to the ParseCrmMonXml object.
self.crmmon_xml_parser.set_crmmon_xml(crmmon_xml)
# Check if the cluster has quorum.
if not self.crmmon_xml_parser.has_quorum():
msg = "Pacemaker cluster doesn't have quorum."
LOG.warning("%s", msg)
return 2
# Get node_state tag list.
node_state_tag_list = self.crmmon_xml_parser.get_node_state_tag_list()
if len(node_state_tag_list) == 0:
@ -402,6 +408,7 @@ class HandleHost(driver.DriverBase):
if self.xml_parser.have_quorum() == 0:
msg = "Pacemaker cluster doesn't have quorum."
LOG.warning("%s", msg)
return 2
# Get node_state tag list.
node_state_tag_list = self.xml_parser.get_node_state_tag_list()

View File

@ -39,6 +39,28 @@ class ParseCrmMonXml(object):
# Convert xml.etree.ElementTree.Element object.
self.crmmon_tag = ElementTree.fromstring(crmmon_xml)
def has_quorum(self):
"""Answers if cluster has quorum or not.
:returns: True if cluster has quorum, False otherwise.
"""
current_dc = self._get_current_dc()
return current_dc.get('with_quorum') == 'true'
def _get_summary(self):
child_list = list(self.crmmon_tag)
for child in child_list:
if child.tag == 'summary':
return child
return None
def _get_current_dc(self):
child_list = list(self._get_summary())
for child in child_list:
if child.tag == 'current_dc':
return child
return None
def _get_nodes(self):
# status tag exists in the crmmon tag.
if self.crmmon_tag is None:

View File

@ -699,13 +699,17 @@ class TestHandleHost(testtools.TestCase):
@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'get_node_state_tag_list')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'has_quorum')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon(
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
mock_get_node_state_tag_list, mock_check_if_status_changed):
mock_has_quorum, mock_get_node_state_tag_list,
mock_check_if_status_changed):
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
mock_set_crmmon_xml.return_value = None
mock_has_quorum.return_value = True
status_tag = ElementTree.fromstring(CRMMON_NODES_TAG_XML)
node_state_tag_list = list(status_tag)
mock_get_node_state_tag_list.return_value = node_state_tag_list
@ -715,7 +719,6 @@ class TestHandleHost(testtools.TestCase):
ret = obj._check_host_status_by_crm_mon()
self.assertEqual(0, ret)
mock_get_node_state_tag_list.assert_called_once_with()
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
mock_get_node_state_tag_list.assert_called_once_with()
mock_check_if_status_changed.assert_called_once_with(
@ -724,16 +727,38 @@ class TestHandleHost(testtools.TestCase):
{'uname': 'remote2', 'crmd': 'online'},
{'uname': 'remote3', 'crmd': 'online'}])
@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'has_quorum')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon_no_quorum(
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
mock_has_quorum, mock_check_if_status_changed):
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
mock_set_crmmon_xml.return_value = None
mock_has_quorum.return_value = False
obj = handle_host.HandleHost()
ret = obj._check_host_status_by_crm_mon()
self.assertEqual(2, ret)
mock_set_crmmon_xml.assert_called_once_with(CRMMON_NODES_TAG_XML)
mock_check_if_status_changed.assert_not_called()
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'has_quorum')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml,
'get_node_state_tag_list')
@mock.patch.object(parse_crmmon_xml.ParseCrmMonXml, 'set_crmmon_xml')
@mock.patch.object(handle_host.HandleHost, '_get_crmmon_xml')
def test_check_host_status_by_crm_mon_not_have_node_state_tag(
self, mock_get_crmmon_xml, mock_set_crmmon_xml,
mock_get_node_state_tag_list):
mock_get_node_state_tag_list, mock_has_quorum):
mock_get_crmmon_xml.return_value = CRMMON_NODES_TAG_XML
mock_set_crmmon_xml.return_value = None
mock_get_node_state_tag_list.return_value = []
mock_has_quorum.return_value = True
obj = handle_host.HandleHost()
@ -783,31 +808,24 @@ class TestHandleHost(testtools.TestCase):
node_state_tag_list)
@mock.patch.object(handle_host.HandleHost, '_check_if_status_changed')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'get_node_state_tag_list')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'have_quorum')
@mock.patch.object(parse_cib_xml.ParseCibXml, 'set_cib_xml')
@mock.patch.object(handle_host.HandleHost, '_get_cib_xml')
def test_check_host_status_by_cibadmin_no_quorum(
self, mock_get_cib_xml, mock_set_cib_xml, mock_have_quorum,
mock_get_node_state_tag_list, mock_check_if_status_changed):
mock_check_if_status_changed):
mock_get_cib_xml.return_value = STATUS_TAG_XML
mock_set_cib_xml.return_value = None
mock_have_quorum.return_value = 0
status_tag = ElementTree.fromstring(STATUS_TAG_XML)
node_state_tag_list = list(status_tag)
mock_get_node_state_tag_list.return_value = node_state_tag_list
mock_check_if_status_changed.return_value = None
obj = handle_host.HandleHost()
ret = obj._check_host_status_by_cibadmin()
self.assertEqual(0, ret)
self.assertEqual(2, ret)
mock_get_cib_xml.assert_called_once_with()
mock_set_cib_xml.assert_called_once_with(STATUS_TAG_XML)
mock_have_quorum.assert_called_once_with()
mock_get_node_state_tag_list.assert_called_once_with()
mock_check_if_status_changed.assert_called_once_with(
node_state_tag_list)
mock_check_if_status_changed.assert_not_called()
@mock.patch.object(handle_host.HandleHost, '_get_cib_xml')
def test_check_host_status_by_cibadmin_cib_xml_is_None(

View File

@ -20,6 +20,10 @@ from masakarimonitors.hostmonitor.host_handler import parse_crmmon_xml
CRMMON_XML = '<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
' <summary>' \
' <stack type="corosync" />' \
' <current_dc present="true" with_quorum="true" />' \
' </summary>' \
' <nodes>' \
' <node name="node-1" id="1001" online="true" />' \
' <node name="node-2" id="1002" online="false" />' \
@ -27,6 +31,15 @@ CRMMON_XML = '<?xml version="1.0"?>' \
' </nodes>' \
'</crm_mon>'
CRMMON_XML_NO_QUORUM = \
'<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
' <summary>' \
' <stack type="corosync" />' \
' <current_dc present="true" with_quorum="false" />' \
' </summary>' \
'</crm_mon>'
CRMMON_NONODES_XML = '<?xml version="1.0"?>' \
'<crm_mon version="1.1.18">' \
' <nodes>' \
@ -47,6 +60,16 @@ class TestParseCrmMonXml(testtools.TestCase):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML)
def test_has_quorum(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML)
self.assertEqual(True, obj.has_quorum())
def test_has_quorum_no_quorum(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML_NO_QUORUM)
self.assertEqual(False, obj.has_quorum())
def test_get_node_state_tag_list(self):
obj = parse_crmmon_xml.ParseCrmMonXml()
obj.set_crmmon_xml(CRMMON_XML)

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes hostmonitor reporting hosts down because of Pacemaker cluster
partitioning.
Now hostmonitor properly respects the status of Pacemaker cluster quorum.
`LP#1878548 <https://launchpad.net/bugs/1878548>`__