Rework health check code
* Modified health check for node poll status mode to treat a node as healthy if it encounters an error getting server status. * Simplified NodePollStatusHealthCheck code * Added do_healthcheck method separate from do_check to clearly show health check behaviour * Simplified NodePollUrlHealthCheck code by using tenacity * Added more log statements Change-Id: I76f0ef95067c81f123bf548c723e93d4cf9c2d49 Closes-Bug: 1800038
This commit is contained in:
parent
111ea8eabd
commit
52d8702274
|
@ -317,6 +317,16 @@ LIFECYCLE_TRANSITION_TYPE = (
|
|||
|
||||
VM_STATUS = (
|
||||
VS_ACTIVE, VS_ERROR, VS_SUSPENDED, VS_SHUTOFF, VS_PAUSED, VS_RESCUE,
|
||||
VS_DELETED,
|
||||
) = (
|
||||
'ACTIVE', 'ERROR', 'SUSPENDED', 'SHUTOFF', 'PAUSED', 'RESCUE',
|
||||
'ACTIVE', 'ERROR', 'SUSPENDED', 'SHUTOFF', 'PAUSED', 'RESCUE', 'DELETED',
|
||||
)
|
||||
|
||||
HEALTH_CHECK_MESSAGE = (
|
||||
POLL_STATUS_PASS, POLL_STATUS_FAIL, POLL_URL_PASS, POLL_URL_FAIL,
|
||||
) = (
|
||||
'Poll Status health check passed',
|
||||
'Poll Status health check failed',
|
||||
'Poll URL health check passed',
|
||||
'Poll URL health check failed',
|
||||
)
|
||||
|
|
|
@ -26,6 +26,7 @@ from oslo_service import service
|
|||
from oslo_service import threadgroup
|
||||
from oslo_utils import timeutils
|
||||
import re
|
||||
import tenacity
|
||||
import time
|
||||
|
||||
from senlin.common import consts
|
||||
|
@ -241,6 +242,32 @@ class HealthCheckType(object):
|
|||
"""
|
||||
pass
|
||||
|
||||
def _node_within_grace_period(self, node):
|
||||
"""Check if current time is within the node_update_timeout grace period
|
||||
|
||||
:returns: True if current time is less than node_update_timeout since
|
||||
last node update action. False otherwise.
|
||||
"""
|
||||
|
||||
node_last_updated = node.updated_at or node.init_at
|
||||
if timeutils.is_older_than(node_last_updated,
|
||||
self.node_update_timeout):
|
||||
# node was last updated more than node_update_timeout seconds ago
|
||||
# -> we are outside the grace period
|
||||
LOG.info("%s was updated at %s which is more "
|
||||
"than %d secs ago. Mark node as unhealthy.",
|
||||
node.name, node_last_updated,
|
||||
self.node_update_timeout)
|
||||
return False
|
||||
else:
|
||||
# node was last updated less than node_update_timeout seconds ago
|
||||
# -> we are inside the grace period
|
||||
LOG.info("%s was updated at %s which is less "
|
||||
"than %d secs ago. Mark node as healthy.",
|
||||
node.name, node_last_updated,
|
||||
self.node_update_timeout)
|
||||
return True
|
||||
|
||||
|
||||
class NodePollStatusHealthCheck(HealthCheckType):
|
||||
def run_health_check(self, ctx, node):
|
||||
|
@ -248,34 +275,26 @@ class NodePollStatusHealthCheck(HealthCheckType):
|
|||
|
||||
:returns: True if node is healthy. False otherwise.
|
||||
"""
|
||||
|
||||
try:
|
||||
# create engine node from db node
|
||||
entity = node_mod.Node._from_object(ctx, node)
|
||||
|
||||
if not entity.do_check(ctx, return_check_result=True):
|
||||
# server was not found as a result of performing check
|
||||
node_last_updated = node.updated_at or node.init_at
|
||||
if not timeutils.is_older_than(
|
||||
node_last_updated, self.node_update_timeout):
|
||||
LOG.info("Node %s was updated at %s which is less "
|
||||
"than %d secs ago. Skip node recovery from "
|
||||
"NodePollStatusHealthCheck.",
|
||||
node.id, node_last_updated,
|
||||
self.node_update_timeout)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
LOG.debug("NodePollStatusHealthCheck reports node %s is "
|
||||
"healthy.", node.id)
|
||||
return True
|
||||
# If health check returns True, return True to mark node as
|
||||
# healthy. Else return True to mark node as healthy if we are still
|
||||
# within the node's grace period to allow the node to warm-up.
|
||||
# Return False to mark the node as unhealthy if we are outside the
|
||||
# grace period.
|
||||
|
||||
return (entity.do_healthcheck(ctx) or
|
||||
self._node_within_grace_period(node))
|
||||
except Exception as ex:
|
||||
LOG.warning(
|
||||
'Error when performing health check on node %s: %s',
|
||||
node.id, ex
|
||||
)
|
||||
return False
|
||||
|
||||
# treat node as healthy when an exception is encountered
|
||||
return True
|
||||
|
||||
|
||||
class NodePollUrlHealthCheck(HealthCheckType):
|
||||
|
@ -299,74 +318,88 @@ class NodePollUrlHealthCheck(HealthCheckType):
|
|||
|
||||
return url
|
||||
|
||||
def run_health_check(self, ctx, node):
|
||||
"""Routine to check a node status from a url and recovery if necessary
|
||||
|
||||
:param node: The node to be checked.
|
||||
:returns: True if node is considered to be healthy. False otherwise.
|
||||
"""
|
||||
|
||||
url_template = self.params['poll_url']
|
||||
def _poll_url(self, url, node):
|
||||
verify_ssl = self.params['poll_url_ssl_verify']
|
||||
conn_error_as_unhealthy = self.params[
|
||||
'poll_url_conn_error_as_unhealthy']
|
||||
expected_resp_str = self.params['poll_url_healthy_response']
|
||||
retry_interval = self.params['poll_url_retry_interval']
|
||||
|
||||
timeout = max(retry_interval * 0.1, 1)
|
||||
|
||||
try:
|
||||
result = utils.url_fetch(url, timeout=timeout,
|
||||
verify=verify_ssl)
|
||||
except Exception as ex:
|
||||
if conn_error_as_unhealthy:
|
||||
LOG.info('%s for %s: connection error when polling URL (%s)',
|
||||
consts.POLL_URL_FAIL, node.name, ex)
|
||||
return False
|
||||
else:
|
||||
LOG.info('%s for %s: ignoring connection error when polling '
|
||||
'URL (%s)',
|
||||
consts.POLL_URL_PASS, node.name, ex)
|
||||
return True
|
||||
|
||||
if not re.search(expected_resp_str, result):
|
||||
LOG.info('%s for %s: did not find expected response string %s in '
|
||||
'URL result (%s)',
|
||||
consts.POLL_URL_FAIL, node.name, expected_resp_str,
|
||||
result)
|
||||
return False
|
||||
|
||||
LOG.info('%s for %s: matched expected response string.',
|
||||
consts.POLL_URL_PASS, node.name)
|
||||
return True
|
||||
|
||||
def run_health_check(self, ctx, node):
|
||||
"""Routine to check a node status from a url and recovery if necessary
|
||||
|
||||
:param node: The node to be checked.
|
||||
:returns: True if node is healthy. False otherwise.
|
||||
"""
|
||||
|
||||
max_unhealthy_retry = self.params['poll_url_retry_limit']
|
||||
retry_interval = self.params['poll_url_retry_interval']
|
||||
|
||||
def stop_node_recovery():
|
||||
node_last_updated = node.updated_at or node.init_at
|
||||
if not timeutils.is_older_than(
|
||||
node_last_updated, self.node_update_timeout):
|
||||
LOG.info("Node %s was updated at %s which is less than "
|
||||
"%d secs ago. Skip node recovery from "
|
||||
"NodePollUrlHealthCheck.",
|
||||
node.id, node_last_updated, self.node_update_timeout)
|
||||
return True
|
||||
def _return_last_value(retry_state):
|
||||
return retry_state.outcome.result()
|
||||
|
||||
LOG.info("Node %s is reported as down (%d retries left)",
|
||||
node.id, available_attemps)
|
||||
time.sleep(retry_interval)
|
||||
|
||||
return False
|
||||
|
||||
url = self._expand_url_template(url_template, node)
|
||||
LOG.debug("Polling node status from URL: %s", url)
|
||||
|
||||
available_attemps = max_unhealthy_retry
|
||||
timeout = max(retry_interval * 0.1, 1)
|
||||
while available_attemps > 0:
|
||||
available_attemps -= 1
|
||||
|
||||
try:
|
||||
result = utils.url_fetch(
|
||||
url, timeout=timeout, verify=verify_ssl)
|
||||
except utils.URLFetchError as ex:
|
||||
if conn_error_as_unhealthy:
|
||||
if stop_node_recovery():
|
||||
return True
|
||||
continue
|
||||
else:
|
||||
LOG.error("Error when requesting node health status from"
|
||||
" %s: %s", url, ex)
|
||||
return True
|
||||
|
||||
LOG.debug("Node status returned from URL(%s): %s", url,
|
||||
result)
|
||||
if re.search(expected_resp_str, result):
|
||||
LOG.debug('NodePollUrlHealthCheck reports node %s is healthy.',
|
||||
node.id)
|
||||
return True
|
||||
@tenacity.retry(
|
||||
retry=tenacity.retry_if_result(lambda x: x is False),
|
||||
wait=tenacity.wait_fixed(retry_interval),
|
||||
retry_error_callback=_return_last_value,
|
||||
stop=tenacity.stop_after_attempt(max_unhealthy_retry)
|
||||
)
|
||||
def _poll_url_with_retry(url):
|
||||
return self._poll_url(url, node)
|
||||
|
||||
try:
|
||||
if node.status != consts.NS_ACTIVE:
|
||||
LOG.info("Skip node recovery because node %s is not in "
|
||||
"ACTIVE state.", node.id)
|
||||
LOG.info('%s for %s: node is not in ACTIVE state, so skip '
|
||||
'poll url',
|
||||
consts.POLL_URL_PASS, node.name)
|
||||
return True
|
||||
|
||||
if stop_node_recovery():
|
||||
return True
|
||||
url_template = self.params['poll_url']
|
||||
url = self._expand_url_template(url_template, node)
|
||||
|
||||
return False
|
||||
# If health check returns True, return True to mark node as
|
||||
# healthy. Else return True to mark node as healthy if we are still
|
||||
# within the node's grace period to allow the node to warm-up.
|
||||
# Return False to mark the node as unhealthy if we are outside the
|
||||
# grace period.
|
||||
|
||||
return (_poll_url_with_retry(url) or
|
||||
self._node_within_grace_period(node))
|
||||
except Exception as ex:
|
||||
LOG.warning(
|
||||
'%s for %s: Ignoring error on poll URL: %s',
|
||||
consts.POLL_URL_PASS, node.name, ex
|
||||
)
|
||||
|
||||
# treat node as healthy when an exception is encountered
|
||||
return True
|
||||
|
||||
|
||||
class HealthManager(service.Service):
|
||||
|
@ -428,8 +461,6 @@ class HealthManager(service.Service):
|
|||
:returns: Recover action
|
||||
"""
|
||||
try:
|
||||
LOG.info("%s is requesting node recovery "
|
||||
"for %s.", self.__class__.__name__, node_id)
|
||||
req = objects.NodeRecoverRequest(identity=node_id,
|
||||
params=recover_action)
|
||||
|
||||
|
@ -516,6 +547,9 @@ class HealthManager(service.Service):
|
|||
recovery_cond))
|
||||
|
||||
if not node_is_healthy:
|
||||
LOG.info("Health check failed for %s in %s and "
|
||||
"recovery has started.",
|
||||
node.name, cluster.name)
|
||||
action = self._recover_node(node.id, ctx,
|
||||
recover_action)
|
||||
actions.append(action)
|
||||
|
@ -529,7 +563,7 @@ class HealthManager(service.Service):
|
|||
"within specified timeout: %s", a['action'],
|
||||
reason)
|
||||
|
||||
if len(actions) > 0:
|
||||
if len(actions) == 0:
|
||||
LOG.info('Health check passed for all nodes in cluster %s.',
|
||||
cluster_id)
|
||||
except Exception as ex:
|
||||
|
|
|
@ -316,7 +316,7 @@ class Node(object):
|
|||
self.index = -1
|
||||
return True
|
||||
|
||||
def do_check(self, context, return_check_result=False):
|
||||
def do_check(self, context):
|
||||
if not self.physical_id:
|
||||
return False
|
||||
|
||||
|
@ -330,9 +330,6 @@ class Node(object):
|
|||
self.set_status(context, consts.NS_ERROR, six.text_type(ex))
|
||||
return False
|
||||
|
||||
if return_check_result:
|
||||
return res
|
||||
|
||||
# Physical object is ACTIVE but for some reason the node status in
|
||||
# senlin was WARNING. We only update the status_reason
|
||||
if res:
|
||||
|
@ -350,6 +347,17 @@ class Node(object):
|
|||
|
||||
return True
|
||||
|
||||
def do_healthcheck(self, context):
|
||||
"""health check a node.
|
||||
|
||||
This function is supposed to be invoked from the health manager to
|
||||
check the health of a given node
|
||||
:param context: The request context of the action.
|
||||
:returns: True if node is healthy. False otherwise.
|
||||
"""
|
||||
|
||||
return pb.Profile.healthcheck_object(context, self)
|
||||
|
||||
def do_recover(self, context, action):
|
||||
"""recover a node.
|
||||
|
||||
|
@ -358,10 +366,19 @@ class Node(object):
|
|||
:param dict options: A map containing the recovery actions (with
|
||||
parameters if any) and fencing settings.
|
||||
"""
|
||||
if not self.physical_id:
|
||||
options = action.inputs
|
||||
|
||||
operations = options.get('operation', [{'name': ''}])
|
||||
reboot_ops = [op for op in operations
|
||||
if op.get('name') == consts.RECOVER_REBOOT]
|
||||
rebuild_ops = [op for op in operations
|
||||
if op.get('name') == consts.RECOVER_REBUILD]
|
||||
if not self.physical_id and (reboot_ops or rebuild_ops):
|
||||
# physical id is required for REBOOT or REBUILD operations
|
||||
LOG.warning('Recovery failed because node has no physical id'
|
||||
' was provided for reboot or rebuild operation.')
|
||||
return False
|
||||
|
||||
options = action.inputs
|
||||
if options.get('check', False):
|
||||
res = False
|
||||
try:
|
||||
|
|
|
@ -301,11 +301,13 @@ class Profile(object):
|
|||
@profiler.trace('Profile.check_object', hide_args=False)
|
||||
def check_object(cls, ctx, obj):
|
||||
profile = cls.load(ctx, profile_id=obj.profile_id)
|
||||
try:
|
||||
return profile.do_check(obj)
|
||||
except exc.InternalError as ex:
|
||||
LOG.debug(ex)
|
||||
return False
|
||||
return profile.do_check(obj)
|
||||
|
||||
@classmethod
|
||||
@profiler.trace('Profile.check_object', hide_args=False)
|
||||
def healthcheck_object(cls, ctx, obj):
|
||||
profile = cls.load(ctx, profile_id=obj.profile_id)
|
||||
return profile.do_healthcheck(obj)
|
||||
|
||||
@classmethod
|
||||
@profiler.trace('Profile.recover_object', hide_args=False)
|
||||
|
@ -461,6 +463,18 @@ class Profile(object):
|
|||
LOG.warning("Check operation not supported.")
|
||||
return True
|
||||
|
||||
def do_healthcheck(self, obj):
|
||||
"""Default healthcheck operation.
|
||||
|
||||
This is provided as a fallback if a specific profile type does not
|
||||
override this method.
|
||||
|
||||
:param obj: The node object to operate on.
|
||||
:return status: True indicates node is healthy, False indicates
|
||||
it is unhealthy.
|
||||
"""
|
||||
return self.do_check(obj)
|
||||
|
||||
def do_get_details(self, obj):
|
||||
"""For subclass to override."""
|
||||
LOG.warning("Get_details operation not supported.")
|
||||
|
|
|
@ -1532,7 +1532,7 @@ class ServerProfile(base.Profile):
|
|||
try:
|
||||
server = self.compute(obj).server_get(obj.physical_id)
|
||||
except exc.InternalError as ex:
|
||||
if "No Server found" in six.text_type(ex):
|
||||
if ex.code == 404:
|
||||
raise exc.EServerNotFound(type='server',
|
||||
id=obj.physical_id,
|
||||
message=six.text_type(ex))
|
||||
|
@ -1546,6 +1546,60 @@ class ServerProfile(base.Profile):
|
|||
|
||||
return True
|
||||
|
||||
def do_healthcheck(self, obj):
|
||||
"""Healthcheck operation.
|
||||
|
||||
This method checks if a server node is healthy by getting the server
|
||||
status from nova. A server is considered unhealthy if it does not
|
||||
exist or its status is one of the following:
|
||||
- ERROR
|
||||
- SHUTOFF
|
||||
- DELETED
|
||||
|
||||
:param obj: The node object to operate on.
|
||||
:return status: True indicates node is healthy, False indicates
|
||||
it is unhealthy.
|
||||
"""
|
||||
unhealthy_server_status = [consts.VS_ERROR, consts.VS_SHUTOFF,
|
||||
consts.VS_DELETED]
|
||||
|
||||
if not obj.physical_id:
|
||||
LOG.info('%s for %s: server has no physical ID.',
|
||||
consts.POLL_STATUS_FAIL, obj.name)
|
||||
return False
|
||||
|
||||
try:
|
||||
server = self.compute(obj).server_get(obj.physical_id)
|
||||
except Exception as ex:
|
||||
if isinstance(ex, exc.InternalError) and ex.code == 404:
|
||||
# treat resource not found exception as unhealthy
|
||||
LOG.info('%s for %s: server was not found.',
|
||||
consts.POLL_STATUS_FAIL, obj.name)
|
||||
return False
|
||||
else:
|
||||
# treat all other exceptions as healthy
|
||||
LOG.info(
|
||||
'%s for %s: Exception when trying to get server info but '
|
||||
'ignoring this error: %s.',
|
||||
consts.POLL_STATUS_PASS, obj.name, ex.message)
|
||||
return True
|
||||
|
||||
if server is None:
|
||||
# no server information is available, treat the node as healthy
|
||||
LOG.info(
|
||||
'%s for %s: No server information was returned but ignoring '
|
||||
'this error.',
|
||||
consts.POLL_STATUS_PASS, obj.name)
|
||||
return True
|
||||
|
||||
if server.status in unhealthy_server_status:
|
||||
LOG.info('%s for %s: server status is unhealthy.',
|
||||
consts.POLL_STATUS_FAIL, obj.name)
|
||||
return False
|
||||
|
||||
LOG.info('%s for %s', consts.POLL_STATUS_PASS, obj.name)
|
||||
return True
|
||||
|
||||
def do_recover(self, obj, **options):
|
||||
"""Handler for recover operation.
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ from oslo_utils import timeutils as tu
|
|||
|
||||
from senlin.common import consts
|
||||
from senlin.common import context
|
||||
from senlin.common import exception as exc
|
||||
from senlin.common import messaging
|
||||
from senlin.common import utils
|
||||
from senlin.engine import health_manager as hm
|
||||
|
@ -618,7 +619,27 @@ class TestNodePollStatusHealthCheck(base.SenlinTestCase):
|
|||
@mock.patch.object(tu, 'is_older_than')
|
||||
def test_run_health_check_healthy(self, mock_tu, mock_node_obj):
|
||||
x_entity = mock.Mock()
|
||||
x_entity.do_check.return_value = True
|
||||
x_entity.do_healthcheck.return_value = True
|
||||
mock_node_obj.return_value = x_entity
|
||||
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock(id='FAKE_NODE1', status="ERROR",
|
||||
updated_at='2018-08-13 18:00:00',
|
||||
init_at='2018-08-13 17:00:00')
|
||||
|
||||
# do it
|
||||
res = self.hc.run_health_check(ctx, node)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_tu.assert_not_called()
|
||||
|
||||
@mock.patch.object(node_mod.Node, '_from_object')
|
||||
@mock.patch.object(tu, 'is_older_than')
|
||||
def test_run_health_check_healthy_internal_error(
|
||||
self, mock_tu, mock_node_obj):
|
||||
x_entity = mock.Mock()
|
||||
x_entity.do_healthcheck.side_effect = exc.InternalError(
|
||||
message='error')
|
||||
mock_node_obj.return_value = x_entity
|
||||
|
||||
ctx = mock.Mock()
|
||||
|
@ -636,7 +657,7 @@ class TestNodePollStatusHealthCheck(base.SenlinTestCase):
|
|||
@mock.patch.object(tu, 'is_older_than')
|
||||
def test_run_health_check_unhealthy(self, mock_tu, mock_node_obj):
|
||||
x_entity = mock.Mock()
|
||||
x_entity.do_check.return_value = False
|
||||
x_entity.do_healthcheck.return_value = False
|
||||
mock_node_obj.return_value = x_entity
|
||||
|
||||
mock_tu.return_value = True
|
||||
|
@ -657,7 +678,7 @@ class TestNodePollStatusHealthCheck(base.SenlinTestCase):
|
|||
def test_run_health_check_unhealthy_within_timeout(
|
||||
self, mock_tu, mock_node_obj):
|
||||
x_entity = mock.Mock()
|
||||
x_entity.do_check.return_value = False
|
||||
x_entity.do_healthcheck.return_value = False
|
||||
mock_node_obj.return_value = x_entity
|
||||
|
||||
mock_tu.return_value = False
|
||||
|
@ -793,8 +814,7 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
res = self.hc.run_health_check(ctx, node)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_url_fetch.assert_called_once_with('FAKE_EXPANDED_URL', timeout=1,
|
||||
verify=True)
|
||||
mock_url_fetch.assert_not_called()
|
||||
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
|
@ -814,8 +834,8 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
res = self.hc.run_health_check(ctx, node)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_url_fetch.assert_called_once_with('FAKE_EXPANDED_URL', timeout=1,
|
||||
verify=True)
|
||||
mock_url_fetch.assert_has_calls(
|
||||
[mock.call('FAKE_EXPANDED_URL', timeout=1, verify=True)])
|
||||
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
|
@ -836,17 +856,14 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
res = self.hc.run_health_check(ctx, node)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_url_fetch.assert_called_once_with('FAKE_EXPANDED_URL', timeout=1,
|
||||
verify=True)
|
||||
mock_url_fetch.assert_has_calls(
|
||||
[mock.call('FAKE_EXPANDED_URL', timeout=1, verify=True)])
|
||||
|
||||
@mock.patch.object(time, "sleep")
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
def test_run_health_check_unhealthy(self,
|
||||
mock_url_fetch,
|
||||
mock_expand_url, mock_time,
|
||||
mock_sleep):
|
||||
def test_run_health_check_unhealthy(self, mock_url_fetch, mock_expand_url,
|
||||
mock_time):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
|
@ -865,16 +882,13 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
mock.call('FAKE_EXPANDED_URL', timeout=1, verify=True)
|
||||
]
|
||||
)
|
||||
mock_sleep.assert_has_calls([mock.call(1), mock.call(1)])
|
||||
|
||||
@mock.patch.object(time, "sleep")
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
def test_run_health_check_conn_error(self,
|
||||
mock_url_fetch,
|
||||
mock_expand_url, mock_time,
|
||||
mock_sleep):
|
||||
mock_expand_url, mock_time):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
|
@ -893,15 +907,31 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
mock.call('FAKE_EXPANDED_URL', timeout=1, verify=True)
|
||||
]
|
||||
)
|
||||
mock_sleep.assert_has_calls([mock.call(1), mock.call(1)])
|
||||
|
||||
@mock.patch.object(time, "sleep")
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
def test_run_health_check_conn_other_error(self,
|
||||
mock_url_fetch,
|
||||
mock_expand_url, mock_time):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
node.id = 'FAKE_ID'
|
||||
mock_time.return_value = True
|
||||
mock_expand_url.side_effect = Exception('blah')
|
||||
|
||||
# do it
|
||||
res = self.hc.run_health_check(ctx, node)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_url_fetch.assert_not_called()
|
||||
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.NodePollUrlHealthCheck, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
def test_run_health_check_conn_error_noop(
|
||||
self, mock_url_fetch, mock_expand_url, mock_time,
|
||||
mock_sleep):
|
||||
self, mock_url_fetch, mock_expand_url, mock_time):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
|
@ -921,7 +951,6 @@ class TestNodePollUrlHealthCheck(base.SenlinTestCase):
|
|||
mock.call('FAKE_EXPANDED_URL', timeout=1, verify=True),
|
||||
]
|
||||
)
|
||||
mock_sleep.assert_not_called()
|
||||
|
||||
|
||||
class TestHealthManager(base.SenlinTestCase):
|
||||
|
|
|
@ -594,6 +594,24 @@ class TestNode(base.SenlinTestCase):
|
|||
% node.physical_id,
|
||||
physical_id=None)
|
||||
|
||||
@mock.patch.object(pb.Profile, 'healthcheck_object')
|
||||
def test_node_healthcheck(self, mock_healthcheck):
|
||||
node = nodem.Node('node1', PROFILE_ID, '')
|
||||
node.status = consts.NS_ACTIVE
|
||||
node.physical_id = 'd94d6333-82e6-4f87-b7ab-b786776df9d1'
|
||||
mock_healthcheck.return_value = True
|
||||
res = node.do_healthcheck(self.context)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_healthcheck.assert_called_once_with(self.context, node)
|
||||
|
||||
def test_node_healthcheck_no_physical_id(self):
|
||||
node = nodem.Node('node1', PROFILE_ID, '')
|
||||
|
||||
res = node.do_healthcheck(self.context)
|
||||
|
||||
self.assertFalse(res)
|
||||
|
||||
@mock.patch.object(nodem.Node, 'set_status')
|
||||
@mock.patch.object(pb.Profile, 'recover_object')
|
||||
def test_node_recover_new_object(self, mock_recover, mock_status):
|
||||
|
@ -611,7 +629,7 @@ class TestNode(base.SenlinTestCase):
|
|||
mock_recover.return_value = new_id, True
|
||||
mock_status.side_effect = set_status
|
||||
action = mock.Mock()
|
||||
action.inputs = {'operation': ['SWIM', 'DANCE']}
|
||||
action.inputs = {'operation': [{'SWIM': 1, 'DANCE': 2}]}
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
||||
|
@ -793,7 +811,7 @@ class TestNode(base.SenlinTestCase):
|
|||
id=node.physical_id,
|
||||
reason='Boom!'
|
||||
)
|
||||
action = mock.Mock(inputs={'operation': ['boom'],
|
||||
action = mock.Mock(inputs={'operation': [{'boom': 1}],
|
||||
'check': True})
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
@ -830,14 +848,93 @@ class TestNode(base.SenlinTestCase):
|
|||
mock.call(self.context, consts.NS_ERROR,
|
||||
reason='Recovery failed')])
|
||||
|
||||
def test_node_recover_no_physical_id(self):
|
||||
def test_node_recover_no_physical_id_reboot_op(self):
|
||||
node = nodem.Node('node1', PROFILE_ID, None)
|
||||
action = mock.Mock()
|
||||
action = mock.Mock(inputs={'operation': [{'name': 'REBOOT'}]})
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
||||
self.assertFalse(res)
|
||||
|
||||
def test_node_recover_no_physical_id_rebuild_op(self):
|
||||
node = nodem.Node('node1', PROFILE_ID, None)
|
||||
action = mock.Mock(inputs={'operation': [{'name': 'REBUILD'}]})
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
||||
self.assertFalse(res)
|
||||
|
||||
@mock.patch.object(nodem.Node, 'set_status')
|
||||
@mock.patch.object(pb.Profile, 'recover_object')
|
||||
def test_node_recover_no_physical_id_no_op(self, mock_recover,
|
||||
mock_status):
|
||||
def set_status(*args, **kwargs):
|
||||
if args[1] == 'ACTIVE':
|
||||
node.physical_id = new_id
|
||||
node.data = {'recovery': 'RECREATE'}
|
||||
|
||||
node = nodem.Node('node1', PROFILE_ID, '', id='fake')
|
||||
new_id = '166db83b-b4a4-49ef-96a8-6c0fdd882d1a'
|
||||
mock_recover.return_value = new_id, True
|
||||
mock_status.side_effect = set_status
|
||||
mock_check = self.patchobject(pb.Profile, 'check_object')
|
||||
mock_check.return_value = False
|
||||
action = mock.Mock(
|
||||
outputs={}, inputs={})
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_check.assert_not_called()
|
||||
mock_recover.assert_called_once_with(
|
||||
self.context, node, **action.inputs)
|
||||
self.assertEqual('node1', node.name)
|
||||
self.assertEqual(new_id, node.physical_id)
|
||||
self.assertEqual(PROFILE_ID, node.profile_id)
|
||||
mock_status.assert_has_calls([
|
||||
mock.call(self.context, 'RECOVERING',
|
||||
reason='Recovery in progress'),
|
||||
mock.call(self.context, consts.NS_ACTIVE,
|
||||
reason='Recovery succeeded',
|
||||
physical_id=new_id,
|
||||
data={'recovery': 'RECREATE'})])
|
||||
|
||||
@mock.patch.object(nodem.Node, 'set_status')
|
||||
@mock.patch.object(pb.Profile, 'recover_object')
|
||||
def test_node_recover_no_physical_id_recreate_op(self, mock_recover,
|
||||
mock_status):
|
||||
def set_status(*args, **kwargs):
|
||||
if args[1] == 'ACTIVE':
|
||||
node.physical_id = new_id
|
||||
node.data = {'recovery': 'RECREATE'}
|
||||
|
||||
node = nodem.Node('node1', PROFILE_ID, '', id='fake')
|
||||
new_id = '166db83b-b4a4-49ef-96a8-6c0fdd882d1a'
|
||||
mock_recover.return_value = new_id, True
|
||||
mock_status.side_effect = set_status
|
||||
mock_check = self.patchobject(pb.Profile, 'check_object')
|
||||
mock_check.return_value = False
|
||||
action = mock.Mock(
|
||||
outputs={}, inputs={'operation': [{'name': 'RECREATE'}],
|
||||
'check': True})
|
||||
|
||||
res = node.do_recover(self.context, action)
|
||||
|
||||
self.assertTrue(res)
|
||||
mock_check.assert_called_once_with(self.context, node)
|
||||
mock_recover.assert_called_once_with(
|
||||
self.context, node, **action.inputs)
|
||||
self.assertEqual('node1', node.name)
|
||||
self.assertEqual(new_id, node.physical_id)
|
||||
self.assertEqual(PROFILE_ID, node.profile_id)
|
||||
mock_status.assert_has_calls([
|
||||
mock.call(self.context, 'RECOVERING',
|
||||
reason='Recovery in progress'),
|
||||
mock.call(self.context, consts.NS_ACTIVE,
|
||||
reason='Recovery succeeded',
|
||||
physical_id=new_id,
|
||||
data={'recovery': 'RECREATE'})])
|
||||
|
||||
@mock.patch.object(nodem.Node, 'set_status')
|
||||
def test_node_recover_operation_not_support(self, mock_set_status):
|
||||
node = nodem.Node('node1', PROFILE_ID, None)
|
||||
|
|
|
@ -1369,6 +1369,76 @@ class TestNovaServerBasic(base.SenlinTestCase):
|
|||
six.text_type(ex))
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
|
||||
def test_do_healthcheck_active(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
cc = mock.Mock()
|
||||
cc.server_get.return_value = mock.Mock(status='ACTIVE')
|
||||
profile._computeclient = cc
|
||||
|
||||
test_server = mock.Mock(physical_id='FAKE_ID')
|
||||
|
||||
res = profile.do_healthcheck(test_server)
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_do_healthcheck_empty_server_obj(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
cc = mock.Mock()
|
||||
cc.server_get.return_value = None
|
||||
profile._computeclient = cc
|
||||
|
||||
test_server = mock.Mock(physical_id='FAKE_ID')
|
||||
|
||||
res = profile.do_healthcheck(test_server)
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_do_healthcheck_exception(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
cc = mock.Mock()
|
||||
ex = exc.InternalError(code=503, message='Error')
|
||||
cc.server_get.side_effect = ex
|
||||
profile._computeclient = cc
|
||||
|
||||
test_server = mock.Mock(physical_id='FAKE_ID')
|
||||
|
||||
res = profile.do_healthcheck(test_server)
|
||||
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
self.assertTrue(res)
|
||||
|
||||
def test_do_healthcheck_error(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
cc = mock.Mock()
|
||||
cc.server_get.return_value = mock.Mock(status='ERROR')
|
||||
profile._computeclient = cc
|
||||
|
||||
test_server = mock.Mock(physical_id='FAKE_ID')
|
||||
|
||||
res = profile.do_healthcheck(test_server)
|
||||
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
self.assertFalse(res)
|
||||
|
||||
def test_do_healthcheck_server_not_found(self):
|
||||
profile = server.ServerProfile('t', self.spec)
|
||||
|
||||
cc = mock.Mock()
|
||||
ex = exc.InternalError(code=404, message='No Server found')
|
||||
cc.server_get.side_effect = ex
|
||||
profile._computeclient = cc
|
||||
|
||||
test_server = mock.Mock(physical_id='FAKE_ID')
|
||||
|
||||
res = profile.do_healthcheck(test_server)
|
||||
|
||||
cc.server_get.assert_called_once_with('FAKE_ID')
|
||||
self.assertFalse(res)
|
||||
|
||||
@mock.patch.object(server.ServerProfile, 'do_delete')
|
||||
@mock.patch.object(server.ServerProfile, 'do_create')
|
||||
def test_do_recover_operation_is_none(self, mock_create, mock_delete):
|
||||
|
|
|
@ -441,10 +441,10 @@ class TestProfileBase(base.SenlinTestCase):
|
|||
side_effect=exception.InternalError(code=400, message='BAD'))
|
||||
obj = mock_load
|
||||
|
||||
res = profile.check_object(self.ctx, obj)
|
||||
self.assertRaises(exception.InternalError, profile.check_object,
|
||||
self.ctx, obj)
|
||||
|
||||
profile.load(self.ctx).do_check.assert_called_once_with(obj)
|
||||
self.assertFalse(res)
|
||||
|
||||
@mock.patch.object(pb.Profile, 'load')
|
||||
def test_update_object_with_profile(self, mock_load):
|
||||
|
|
Loading…
Reference in New Issue