Set "disabled reason" for compute service.
Masakari never sets reason why compute service was disabled. "disabled reason" was added in config. Closes-Bug: 1936181 Change-Id: I998f7884195b93927773c7186d61c13670a53662
This commit is contained in:
parent
f2e830f927
commit
c861437b52
@ -27,6 +27,11 @@ host_recovery_group = cfg.OptGroup(
|
|||||||
title='Host failure recovery options',
|
title='Host failure recovery options',
|
||||||
help="Configuration options for host failure recovery")
|
help="Configuration options for host failure recovery")
|
||||||
|
|
||||||
|
process_recovery_group = cfg.OptGroup(
|
||||||
|
'process_failure',
|
||||||
|
title='Process failure recovery options',
|
||||||
|
help="Configuration options for process failure recovery")
|
||||||
|
|
||||||
customized_recovery_flow_group = cfg.OptGroup(
|
customized_recovery_flow_group = cfg.OptGroup(
|
||||||
'taskflow_driver_recovery_flows',
|
'taskflow_driver_recovery_flows',
|
||||||
title='Customized recovery flow Options',
|
title='Customized recovery flow Options',
|
||||||
@ -80,6 +85,10 @@ Operators can decide whether reserved_host should be added to aggregate group
|
|||||||
of failed compute host. When set to True, reserved host will be added to the
|
of failed compute host. When set to True, reserved host will be added to the
|
||||||
aggregate group of failed compute host. When set to False, the reserved_host
|
aggregate group of failed compute host. When set to False, the reserved_host
|
||||||
will not be added to the aggregate group of failed compute host."""),
|
will not be added to the aggregate group of failed compute host."""),
|
||||||
|
cfg.StrOpt("service_disable_reason",
|
||||||
|
default="Masakari detected host failed.",
|
||||||
|
help="Compute disable reason in case Masakari detects host "
|
||||||
|
"failure."),
|
||||||
]
|
]
|
||||||
|
|
||||||
instance_failure_options = [
|
instance_failure_options = [
|
||||||
@ -220,14 +229,23 @@ The allowed values for this option is comma separated dictionary of object
|
|||||||
names in between ``{`` and ``}``."""))
|
names in between ``{`` and ``}``."""))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
process_failure_opts = [
|
||||||
|
cfg.StrOpt("service_disable_reason",
|
||||||
|
default="Masakari detected process failed.",
|
||||||
|
help="Compute disable reason in case Masakari detects process "
|
||||||
|
"failure."),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def register_opts(conf):
|
def register_opts(conf):
|
||||||
conf.register_group(instance_recovery_group)
|
conf.register_group(instance_recovery_group)
|
||||||
conf.register_group(host_recovery_group)
|
conf.register_group(host_recovery_group)
|
||||||
|
conf.register_group(process_recovery_group)
|
||||||
conf.register_group(customized_recovery_flow_group)
|
conf.register_group(customized_recovery_flow_group)
|
||||||
conf.register_group(taskflow_group)
|
conf.register_group(taskflow_group)
|
||||||
conf.register_opts(instance_failure_options, group=instance_recovery_group)
|
conf.register_opts(instance_failure_options, group=instance_recovery_group)
|
||||||
conf.register_opts(host_failure_opts, group=host_recovery_group)
|
conf.register_opts(host_failure_opts, group=host_recovery_group)
|
||||||
|
conf.register_opts(process_failure_opts, group=process_recovery_group)
|
||||||
conf.register_opts(taskflow_driver_recovery_flows,
|
conf.register_opts(taskflow_driver_recovery_flows,
|
||||||
group=customized_recovery_flow_group)
|
group=customized_recovery_flow_group)
|
||||||
conf.register_opts(taskflow_options, group=taskflow_group)
|
conf.register_opts(taskflow_options, group=taskflow_group)
|
||||||
@ -237,6 +255,7 @@ def list_opts():
|
|||||||
return {
|
return {
|
||||||
instance_recovery_group.name: instance_failure_options,
|
instance_recovery_group.name: instance_failure_options,
|
||||||
host_recovery_group.name: host_failure_opts,
|
host_recovery_group.name: host_failure_opts,
|
||||||
|
process_recovery_group.name: process_failure_opts,
|
||||||
taskflow_group.name: taskflow_options
|
taskflow_group.name: taskflow_options
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,8 @@ class DisableComputeServiceTask(base.MasakariTask):
|
|||||||
def execute(self, host_name):
|
def execute(self, host_name):
|
||||||
msg = "Disabling compute service on host: '%s'" % host_name
|
msg = "Disabling compute service on host: '%s'" % host_name
|
||||||
self.update_details(msg)
|
self.update_details(msg)
|
||||||
self.novaclient.enable_disable_service(self.context, host_name)
|
self.novaclient.enable_disable_service(self.context, host_name,
|
||||||
|
reason=CONF.host_failure.service_disable_reason)
|
||||||
# Sleep until nova-compute service is marked as disabled.
|
# Sleep until nova-compute service is marked as disabled.
|
||||||
log_msg = ("Sleeping %(wait)s sec before starting recovery "
|
log_msg = ("Sleeping %(wait)s sec before starting recovery "
|
||||||
"thread until nova recognizes the node down.")
|
"thread until nova recognizes the node down.")
|
||||||
|
@ -45,7 +45,8 @@ class DisableComputeNodeTask(base.MasakariTask):
|
|||||||
if not self.novaclient.is_service_disabled(self.context, host_name,
|
if not self.novaclient.is_service_disabled(self.context, host_name,
|
||||||
process_name):
|
process_name):
|
||||||
# disable compute node on given host
|
# disable compute node on given host
|
||||||
self.novaclient.enable_disable_service(self.context, host_name)
|
self.novaclient.enable_disable_service(self.context, host_name,
|
||||||
|
reason=CONF.process_failure.service_disable_reason)
|
||||||
msg = "Disabled compute service on host: '%s'" % host_name
|
msg = "Disabled compute service on host: '%s'" % host_name
|
||||||
self.update_details(msg, 1.0)
|
self.update_details(msg, 1.0)
|
||||||
else:
|
else:
|
||||||
|
@ -52,6 +52,7 @@ class HostFailureTestCase(test.TestCase):
|
|||||||
self.instance_host = "fake-host"
|
self.instance_host = "fake-host"
|
||||||
self.novaclient = nova.API()
|
self.novaclient = nova.API()
|
||||||
self.fake_client = fakes.FakeNovaClient()
|
self.fake_client = fakes.FakeNovaClient()
|
||||||
|
self.disabled_reason = CONF.host_failure.service_disable_reason
|
||||||
|
|
||||||
def _verify_instance_evacuated(self, old_instance_list):
|
def _verify_instance_evacuated(self, old_instance_list):
|
||||||
for server in old_instance_list:
|
for server in old_instance_list:
|
||||||
@ -86,7 +87,7 @@ class HostFailureTestCase(test.TestCase):
|
|||||||
task.execute(self.instance_host)
|
task.execute(self.instance_host)
|
||||||
|
|
||||||
mock_enable_disable.assert_called_once_with(
|
mock_enable_disable.assert_called_once_with(
|
||||||
self.ctxt, self.instance_host)
|
self.ctxt, self.instance_host, reason=self.disabled_reason)
|
||||||
|
|
||||||
def _test_instance_list(self, instances_evacuation_count):
|
def _test_instance_list(self, instances_evacuation_count):
|
||||||
task = host_failure.PrepareHAEnabledInstancesTask(self.ctxt,
|
task = host_failure.PrepareHAEnabledInstancesTask(self.ctxt,
|
||||||
|
@ -20,12 +20,15 @@ Unit Tests for process failure TaskFlow
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from masakari.compute import nova
|
from masakari.compute import nova
|
||||||
|
from masakari import conf
|
||||||
from masakari import context
|
from masakari import context
|
||||||
from masakari.engine.drivers.taskflow import process_failure
|
from masakari.engine.drivers.taskflow import process_failure
|
||||||
from masakari import exception
|
from masakari import exception
|
||||||
from masakari import test
|
from masakari import test
|
||||||
from masakari.tests.unit import fakes
|
from masakari.tests.unit import fakes
|
||||||
|
|
||||||
|
CONF = conf.CONF
|
||||||
|
|
||||||
|
|
||||||
class ProcessFailureTestCase(test.TestCase):
|
class ProcessFailureTestCase(test.TestCase):
|
||||||
|
|
||||||
@ -39,6 +42,7 @@ class ProcessFailureTestCase(test.TestCase):
|
|||||||
# overriding 'wait_period_after_service_update' to 2 seconds
|
# overriding 'wait_period_after_service_update' to 2 seconds
|
||||||
# to reduce the wait period.
|
# to reduce the wait period.
|
||||||
self.override_config('wait_period_after_service_update', 2)
|
self.override_config('wait_period_after_service_update', 2)
|
||||||
|
self.disabled_reason = CONF.process_failure.service_disable_reason
|
||||||
|
|
||||||
@mock.patch('masakari.compute.nova.novaclient')
|
@mock.patch('masakari.compute.nova.novaclient')
|
||||||
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||||
|
@ -172,6 +172,13 @@ class FakeNovaClient(object):
|
|||||||
services.append(service)
|
services.append(service)
|
||||||
return services
|
return services
|
||||||
|
|
||||||
|
def disable_log_reason(self, service_id, reason):
|
||||||
|
for _service in self._services:
|
||||||
|
if _service.id == service_id:
|
||||||
|
service = _service
|
||||||
|
service.status = 'disabled'
|
||||||
|
service.disabled_reason = reason
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.servers = FakeNovaClient.ServerManager()
|
self.servers = FakeNovaClient.ServerManager()
|
||||||
self.services = FakeNovaClient.Services()
|
self.services = FakeNovaClient.Services()
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Nova compute service "disable reason" is now set
|
||||||
|
in case of host or process failure.
|
||||||
|
It can be customised per type of failure via config.
|
Loading…
Reference in New Issue
Block a user