Make the workgroup status more robust
The current charm does not indicated to the end user when a specific resource is not running. Neither does it indicate when a node is offline or stopped. Validate that configured resources are actually running and let the end user know if they are not. Closes-Bug: #1834263 Change-Id: I1171e71ae3b015b4b838b7ecf0de18eb10d7c8f2
This commit is contained in:
parent
4d391e8107
commit
9364440075
|
@ -113,6 +113,37 @@ def crm_res_running(opt_name):
|
|||
return False
|
||||
|
||||
|
||||
def crm_res_running_on_node(resource, node):
|
||||
"""Determine if the resource is running on the given node.
|
||||
|
||||
If the resource is active/passive check if it is running on any node.
|
||||
If the resources is active/active check it is running on the given node.
|
||||
|
||||
:param resource: str name of resource
|
||||
:param node: str name of node
|
||||
:returns: boolean
|
||||
"""
|
||||
|
||||
(_, output) = subprocess.getstatusoutput(
|
||||
"crm resource status {}".format(resource))
|
||||
lines = output.split("\n")
|
||||
|
||||
if len(lines) > 1:
|
||||
# Multi line is a clone list like haproxy and should run on all nodes
|
||||
# check if it is running on this node
|
||||
for line in lines:
|
||||
if node in line:
|
||||
if line.startswith("resource {} is running".format(resource)):
|
||||
return True
|
||||
else:
|
||||
# Single line is for active/passive like a VIP, may not be on this node
|
||||
# but check it is running somewhere
|
||||
if output.startswith("resource {} is running".format(resource)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def list_nodes():
|
||||
"""List member nodes."""
|
||||
cmd = ['crm', 'node', 'status']
|
||||
|
|
|
@ -1157,7 +1157,8 @@ def pause_unit():
|
|||
if has_resources:
|
||||
messages.append("Resources still running on unit")
|
||||
status, message = assess_status_helper()
|
||||
if status != 'active':
|
||||
# New status message will indicate the resource is not running
|
||||
if status != 'active' and 'not running' not in message:
|
||||
messages.append(message)
|
||||
if messages and not is_unit_upgrading_set():
|
||||
raise Exception("Couldn't pause: {}".format("; ".join(messages)))
|
||||
|
@ -1210,6 +1211,14 @@ def assess_status_helper():
|
|||
status = 'maintenance'
|
||||
message = 'Pacemaker in maintenance mode'
|
||||
|
||||
for resource in get_resources().keys():
|
||||
if not pcmk.is_resource_present(resource):
|
||||
return ("waiting",
|
||||
"Resource: {} not yet configured".format(resource))
|
||||
if not pcmk.crm_res_running_on_node(resource, get_hostname()):
|
||||
return ("blocked",
|
||||
"Resource: {} not running".format(resource))
|
||||
|
||||
return status, message
|
||||
|
||||
|
||||
|
@ -1266,3 +1275,15 @@ def maintenance_mode(enable):
|
|||
pcmk.set_property('maintenance-mode', str(enable).lower())
|
||||
else:
|
||||
log('Desired value for maintenance-mode is already set', level=DEBUG)
|
||||
|
||||
|
||||
def get_resources():
|
||||
"""Get resources from the HA relation
|
||||
|
||||
:returns: dict of resources
|
||||
"""
|
||||
resources = {}
|
||||
for rid in relation_ids("ha"):
|
||||
for unit in related_units(rid):
|
||||
resources = parse_data(rid, unit, 'resources')
|
||||
return resources
|
||||
|
|
|
@ -107,6 +107,50 @@ class TestPcmk(unittest.TestCase):
|
|||
getstatusoutput.return_value = (1, "foobar")
|
||||
self.assertFalse(pcmk.crm_res_running('res_nova_consoleauth'))
|
||||
|
||||
@mock.patch('subprocess.getstatusoutput')
|
||||
def test_crm_res_running_on_node(self, getstatusoutput):
|
||||
_resource = "res_nova_consoleauth"
|
||||
_this_node = "node1"
|
||||
_another_node = "node5"
|
||||
|
||||
# Not running
|
||||
getstatusoutput.return_value = (1, "foobar")
|
||||
self.assertFalse(
|
||||
pcmk.crm_res_running_on_node(_resource, _this_node))
|
||||
|
||||
# Running active/passive on some other node
|
||||
getstatusoutput.return_value = (
|
||||
0, "resource {} is running: {}".format(_resource, _another_node))
|
||||
self.assertTrue(
|
||||
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||
|
||||
# Running active/passive on this node
|
||||
getstatusoutput.return_value = (
|
||||
0, "resource {} is running: {}".format(_resource, _this_node))
|
||||
self.assertTrue(
|
||||
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||
|
||||
# Running on some but not this node
|
||||
getstatusoutput.return_value = (
|
||||
0, ("resource {} is running: {}\nresource {} is NOT running"
|
||||
.format(_resource, _another_node, _resource)))
|
||||
self.assertFalse(
|
||||
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||
|
||||
# Running on this node and not others
|
||||
getstatusoutput.return_value = (
|
||||
0, ("resource {} is running: {}\nresource {} is NOT running"
|
||||
.format(_resource, _this_node, _resource)))
|
||||
self.assertTrue(
|
||||
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||
|
||||
# Running on more than one and this node
|
||||
getstatusoutput.return_value = (
|
||||
0, ("resource {} is running: {}\nresource {} is running: {}"
|
||||
.format(_resource, _another_node, _resource, _this_node)))
|
||||
self.assertTrue(
|
||||
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||
|
||||
@mock.patch('socket.gethostname')
|
||||
@mock.patch('subprocess.getstatusoutput')
|
||||
def test_wait_for_pcmk(self, getstatusoutput, gethostname):
|
||||
|
|
Loading…
Reference in New Issue