Merge "Implement auto_priority and rh_priority recovery_methods"
This commit is contained in:
commit
2525205c24
|
@ -20,6 +20,7 @@ Driver TaskFlowDriver:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
from oslo_utils import excutils
|
||||||
|
|
||||||
from masakari.compute import nova
|
from masakari.compute import nova
|
||||||
from masakari.engine import driver
|
from masakari.engine import driver
|
||||||
|
@ -39,6 +40,81 @@ class TaskFlowDriver(driver.NotificationDriver):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(TaskFlowDriver, self).__init__()
|
super(TaskFlowDriver, self).__init__()
|
||||||
|
|
||||||
|
def _execute_auto_workflow(self, novaclient, process_what):
|
||||||
|
flow_engine = host_failure.get_auto_flow(novaclient, process_what)
|
||||||
|
|
||||||
|
# Attaching this listener will capture all of the notifications
|
||||||
|
# that taskflow sends out and redirect them to a more useful
|
||||||
|
# log for masakari's debugging (or error reporting) usage.
|
||||||
|
with base.DynamicLogListener(flow_engine, logger=LOG):
|
||||||
|
flow_engine.run()
|
||||||
|
|
||||||
|
def _execute_rh_workflow(self, novaclient, process_what,
|
||||||
|
reserved_host_list):
|
||||||
|
if not reserved_host_list:
|
||||||
|
msg = _('No reserved_hosts available for evacuation.')
|
||||||
|
LOG.info(msg)
|
||||||
|
raise exception.ReservedHostsUnavailable(message=msg)
|
||||||
|
|
||||||
|
process_what['reserved_host_list'] = reserved_host_list
|
||||||
|
flow_engine = host_failure.get_rh_flow(novaclient, process_what)
|
||||||
|
|
||||||
|
with base.DynamicLogListener(flow_engine, logger=LOG):
|
||||||
|
try:
|
||||||
|
flow_engine.run()
|
||||||
|
except exception.LockAlreadyAcquired as ex:
|
||||||
|
raise exception.HostRecoveryFailureException(ex.message)
|
||||||
|
|
||||||
|
def _execute_auto_priority_workflow(self, novaclient, process_what,
|
||||||
|
reserved_host_list):
|
||||||
|
try:
|
||||||
|
self._execute_auto_workflow(novaclient, process_what)
|
||||||
|
except Exception as ex:
|
||||||
|
with excutils.save_and_reraise_exception(reraise=False) as ctxt:
|
||||||
|
if isinstance(ex, exception.SkipHostRecoveryException):
|
||||||
|
ctxt.reraise = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# Caught generic Exception to make sure that any failure
|
||||||
|
# should lead to execute 'reserved_host' recovery workflow.
|
||||||
|
msg = _LW("Failed to evacuate all instances from "
|
||||||
|
"failed_host: '%(failed_host)s' using "
|
||||||
|
"'%(auto)s' workflow, retrying using "
|
||||||
|
"'%(reserved_host)s' workflow.")
|
||||||
|
LOG.warning(msg, {
|
||||||
|
'failed_host': process_what['host_name'],
|
||||||
|
'auto': fields.FailoverSegmentRecoveryMethod.AUTO,
|
||||||
|
'reserved_host':
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RESERVED_HOST
|
||||||
|
})
|
||||||
|
self._execute_rh_workflow(novaclient, process_what,
|
||||||
|
reserved_host_list)
|
||||||
|
|
||||||
|
def _execute_rh_priority_workflow(self, novaclient, process_what,
|
||||||
|
reserved_host_list):
|
||||||
|
try:
|
||||||
|
self._execute_rh_workflow(novaclient, process_what,
|
||||||
|
reserved_host_list)
|
||||||
|
except Exception as ex:
|
||||||
|
with excutils.save_and_reraise_exception(reraise=False) as ctxt:
|
||||||
|
if isinstance(ex, exception.SkipHostRecoveryException):
|
||||||
|
ctxt.reraise = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# Caught generic Exception to make sure that any failure
|
||||||
|
# should lead to execute 'auto' recovery workflow.
|
||||||
|
msg = _LW("Failed to evacuate all instances from "
|
||||||
|
"failed_host '%(failed_host)s' using "
|
||||||
|
"'%(reserved_host)s' workflow, retrying using "
|
||||||
|
"'%(auto)s' workflow")
|
||||||
|
LOG.warning(msg, {
|
||||||
|
'failed_host': process_what['host_name'],
|
||||||
|
'reserved_host':
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RESERVED_HOST,
|
||||||
|
'auto': fields.FailoverSegmentRecoveryMethod.AUTO
|
||||||
|
})
|
||||||
|
self._execute_auto_workflow(novaclient, process_what)
|
||||||
|
|
||||||
def execute_host_failure(self, context, host_name, recovery_method,
|
def execute_host_failure(self, context, host_name, recovery_method,
|
||||||
notification_uuid, reserved_host_list=None):
|
notification_uuid, reserved_host_list=None):
|
||||||
novaclient = nova.API()
|
novaclient = nova.API()
|
||||||
|
@ -50,42 +126,28 @@ class TaskFlowDriver(driver.NotificationDriver):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if recovery_method == fields.FailoverSegmentRecoveryMethod.AUTO:
|
if recovery_method == fields.FailoverSegmentRecoveryMethod.AUTO:
|
||||||
flow_engine = host_failure.get_auto_flow(novaclient,
|
self._execute_auto_workflow(novaclient, process_what)
|
||||||
process_what)
|
|
||||||
elif recovery_method == (
|
elif recovery_method == (
|
||||||
fields.FailoverSegmentRecoveryMethod.RESERVED_HOST):
|
fields.FailoverSegmentRecoveryMethod.RESERVED_HOST):
|
||||||
if not reserved_host_list:
|
self._execute_rh_workflow(novaclient, process_what,
|
||||||
msg = _('No reserved_hosts available for evacuation.')
|
reserved_host_list)
|
||||||
LOG.info(msg)
|
|
||||||
raise exception.ReservedHostsUnavailable(message=msg)
|
|
||||||
|
|
||||||
process_what['reserved_host_list'] = reserved_host_list
|
|
||||||
flow_engine = host_failure.get_rh_flow(
|
|
||||||
novaclient, process_what)
|
|
||||||
elif recovery_method == (
|
elif recovery_method == (
|
||||||
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY):
|
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY):
|
||||||
raise NotImplementedError(_("Flow not implemented for "
|
self._execute_auto_priority_workflow(novaclient, process_what,
|
||||||
"recovery_method"),
|
reserved_host_list)
|
||||||
recovery_method)
|
else:
|
||||||
elif recovery_method == (
|
self._execute_rh_priority_workflow(novaclient, process_what,
|
||||||
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY):
|
reserved_host_list)
|
||||||
raise NotImplementedError(_("Flow not implemented for "
|
except Exception as exc:
|
||||||
"recovery_method"),
|
with excutils.save_and_reraise_exception(reraise=False) as ctxt:
|
||||||
recovery_method)
|
if isinstance(exc, (exception.SkipHostRecoveryException,
|
||||||
except Exception:
|
exception.HostRecoveryFailureException,
|
||||||
msg = (_('Failed to create host failure flow.'),
|
exception.ReservedHostsUnavailable)):
|
||||||
notification_uuid)
|
ctxt.reraise = True
|
||||||
LOG.exception(msg)
|
return
|
||||||
raise exception.MasakariException(msg)
|
msg = _("Failed to execute host failure flow for "
|
||||||
|
"notification '%s'.") % notification_uuid
|
||||||
# Attaching this listener will capture all of the notifications that
|
raise exception.MasakariException(msg)
|
||||||
# taskflow sends out and redirect them to a more useful log for
|
|
||||||
# masakari's debugging (or error reporting) usage.
|
|
||||||
with base.DynamicLogListener(flow_engine, logger=LOG):
|
|
||||||
try:
|
|
||||||
flow_engine.run()
|
|
||||||
except exception.LockAlreadyAcquired as ex:
|
|
||||||
raise exception.HostRecoveryFailureException(ex.message)
|
|
||||||
|
|
||||||
def execute_instance_failure(self, context, instance_uuid,
|
def execute_instance_failure(self, context, instance_uuid,
|
||||||
notification_uuid):
|
notification_uuid):
|
||||||
|
|
|
@ -0,0 +1,217 @@
|
||||||
|
# Copyright 2017 NTT DATA
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import mock
|
||||||
|
|
||||||
|
from masakari import context
|
||||||
|
from masakari.engine.drivers.taskflow import base
|
||||||
|
from masakari.engine.drivers.taskflow import driver
|
||||||
|
from masakari.engine.drivers.taskflow import host_failure
|
||||||
|
from masakari import exception
|
||||||
|
from masakari.objects import fields
|
||||||
|
from masakari import test
|
||||||
|
from masakari.tests import uuidsentinel
|
||||||
|
|
||||||
|
|
||||||
|
class FakeFlow(object):
|
||||||
|
"""Fake flow class of taskflow."""
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
# run method which actually runs the flow
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TaskflowDriverTestCase(test.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(TaskflowDriverTestCase, self).setUp()
|
||||||
|
self.taskflow_driver = driver.TaskFlowDriver()
|
||||||
|
self.ctxt = context.get_admin_context()
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_auto_priority_recovery_flow_auto_success(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
mock_auto_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(return_value=None)
|
||||||
|
self.taskflow_driver.execute_host_failure(
|
||||||
|
self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification, reserved_host_list=[
|
||||||
|
'host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'auto' flow executes successfully
|
||||||
|
self.assertTrue(mock_auto_flow.called)
|
||||||
|
# Ensures that 'reserved_host' flow will not execute
|
||||||
|
self.assertFalse(mock_rh_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_auto_priority_recovery_flow_rh_success(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
mock_auto_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.HostRecoveryFailureException)
|
||||||
|
self.taskflow_driver.execute_host_failure(
|
||||||
|
self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification, reserved_host_list=[
|
||||||
|
'host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'auto' flow fails to recover instances
|
||||||
|
self.assertTrue(mock_auto_flow.called)
|
||||||
|
# Ensures that 'reserved_host' flow executes as 'auto' flow fails
|
||||||
|
self.assertTrue(mock_rh_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_rh_priority_recovery_flow_rh_success(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
mock_rh_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(return_value=None)
|
||||||
|
self.taskflow_driver.execute_host_failure(
|
||||||
|
self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification, reserved_host_list=[
|
||||||
|
'host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'reserved_host' flow executes successfully
|
||||||
|
self.assertTrue(mock_rh_flow.called)
|
||||||
|
# Ensures that 'auto' flow will not execute
|
||||||
|
self.assertFalse(mock_auto_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_rh_priority_recovery_flow_auto_success(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
mock_rh_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.HostRecoveryFailureException)
|
||||||
|
self.taskflow_driver.execute_host_failure(
|
||||||
|
self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification, reserved_host_list=[
|
||||||
|
'host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'reserved_host' flow fails to recover instances
|
||||||
|
self.assertTrue(mock_rh_flow.called)
|
||||||
|
# Ensures that 'auto' flow executes as 'reserved_host' flow fails
|
||||||
|
self.assertTrue(mock_auto_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_complete_auto_priority_recovery_flow_failure(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
|
||||||
|
mock_auto_flow.return_value = FakeFlow
|
||||||
|
mock_rh_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.HostRecoveryFailureException)
|
||||||
|
|
||||||
|
# Ensures that both 'auto' and 'reserved_host' flow fails to
|
||||||
|
# evacuate instances
|
||||||
|
self.assertRaises(
|
||||||
|
exception.HostRecoveryFailureException,
|
||||||
|
self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification,
|
||||||
|
reserved_host_list=['host-1', 'host-2'])
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_complete_rh_priority_recovery_flow_failure(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
|
||||||
|
mock_rh_flow.return_value = FakeFlow
|
||||||
|
mock_auto_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.HostRecoveryFailureException)
|
||||||
|
|
||||||
|
# Ensures that both 'reserved_host' and 'auto' flow fails to
|
||||||
|
# evacuate instances
|
||||||
|
self.assertRaises(
|
||||||
|
exception.HostRecoveryFailureException,
|
||||||
|
self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification,
|
||||||
|
reserved_host_list=['host-1', 'host-2'])
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_rh_priority_recovery_flow_skip_recovery(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
|
||||||
|
mock_rh_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.SkipHostRecoveryException)
|
||||||
|
|
||||||
|
self.assertRaises(
|
||||||
|
exception.SkipHostRecoveryException,
|
||||||
|
self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification,
|
||||||
|
reserved_host_list=['host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'reserved_host' flow executes but skip the host
|
||||||
|
# recovery
|
||||||
|
self.assertTrue(mock_rh_flow.called)
|
||||||
|
# Ensures that 'auto' flow will not execute
|
||||||
|
self.assertFalse(mock_auto_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_auto_priority_recovery_flow_skip_recovery(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
|
||||||
|
mock_auto_flow.return_value = FakeFlow
|
||||||
|
FakeFlow.run = mock.Mock(
|
||||||
|
side_effect=exception.SkipHostRecoveryException)
|
||||||
|
|
||||||
|
self.assertRaises(
|
||||||
|
exception.SkipHostRecoveryException,
|
||||||
|
self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification,
|
||||||
|
reserved_host_list=['host-1', 'host-2'])
|
||||||
|
|
||||||
|
# Ensures that 'auto' flow executes but skip the host recovery
|
||||||
|
self.assertTrue(mock_auto_flow.called)
|
||||||
|
# Ensures that 'reserved_host' flow will not execute
|
||||||
|
self.assertFalse(mock_rh_flow.called)
|
||||||
|
|
||||||
|
@mock.patch.object(base, 'DynamicLogListener')
|
||||||
|
@mock.patch.object(host_failure, 'get_auto_flow')
|
||||||
|
@mock.patch.object(host_failure, 'get_rh_flow')
|
||||||
|
def test_rh_priority_recovery_flow_reserved_hosts_not_available(
|
||||||
|
self, mock_rh_flow, mock_auto_flow, mock_listener):
|
||||||
|
|
||||||
|
self.taskflow_driver.execute_host_failure(
|
||||||
|
self.ctxt, 'fake_host',
|
||||||
|
fields.FailoverSegmentRecoveryMethod.RH_PRIORITY,
|
||||||
|
uuidsentinel.fake_notification)
|
||||||
|
|
||||||
|
# Ensures that if there are no reserved_hosts for recovery
|
||||||
|
# 'reserved_host' flow will not execute
|
||||||
|
self.assertFalse(mock_rh_flow.called)
|
||||||
|
# Ensures that 'auto' flow executes as 'reserved_host' flow fails
|
||||||
|
self.assertTrue(mock_auto_flow.called)
|
|
@ -0,0 +1,12 @@
|
||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Implemented workflow for 'auto_priority' and 'rh_priority' recovery methods
|
||||||
|
in case of host failure recovery. Operators can set failover_segment's
|
||||||
|
recovery_method as 'auto_priority' and 'rh_priority' now. In case of
|
||||||
|
'auto_priority' the 'auto' workflow will be executed first to recover the
|
||||||
|
instances from failed compute host. If 'auto' workflow fails to recover
|
||||||
|
the instances then 'reserved_host' workflow will be tried. In case of
|
||||||
|
'rh_priority' the 'reserved_host' workflow will be executed first to
|
||||||
|
recover the instances from failed compute host. If 'reserved_host' workflow
|
||||||
|
fails to recover the instances then 'auto' workflow will be tried.
|
Loading…
Reference in New Issue