Set "disabled reason" for compute service.
Masakari never sets reason why compute service was disabled. "disabled reason" was added in config. Closes-Bug: 1936181 Change-Id: I998f7884195b93927773c7186d61c13670a53662
This commit is contained in:
parent
f2e830f927
commit
c861437b52
@ -27,6 +27,11 @@ host_recovery_group = cfg.OptGroup(
|
||||
title='Host failure recovery options',
|
||||
help="Configuration options for host failure recovery")
|
||||
|
||||
process_recovery_group = cfg.OptGroup(
|
||||
'process_failure',
|
||||
title='Process failure recovery options',
|
||||
help="Configuration options for process failure recovery")
|
||||
|
||||
customized_recovery_flow_group = cfg.OptGroup(
|
||||
'taskflow_driver_recovery_flows',
|
||||
title='Customized recovery flow Options',
|
||||
@ -80,6 +85,10 @@ Operators can decide whether reserved_host should be added to aggregate group
|
||||
of failed compute host. When set to True, reserved host will be added to the
|
||||
aggregate group of failed compute host. When set to False, the reserved_host
|
||||
will not be added to the aggregate group of failed compute host."""),
|
||||
cfg.StrOpt("service_disable_reason",
|
||||
default="Masakari detected host failed.",
|
||||
help="Compute disable reason in case Masakari detects host "
|
||||
"failure."),
|
||||
]
|
||||
|
||||
instance_failure_options = [
|
||||
@ -220,14 +229,23 @@ The allowed values for this option is comma separated dictionary of object
|
||||
names in between ``{`` and ``}``."""))
|
||||
]
|
||||
|
||||
process_failure_opts = [
|
||||
cfg.StrOpt("service_disable_reason",
|
||||
default="Masakari detected process failed.",
|
||||
help="Compute disable reason in case Masakari detects process "
|
||||
"failure."),
|
||||
]
|
||||
|
||||
|
||||
def register_opts(conf):
|
||||
conf.register_group(instance_recovery_group)
|
||||
conf.register_group(host_recovery_group)
|
||||
conf.register_group(process_recovery_group)
|
||||
conf.register_group(customized_recovery_flow_group)
|
||||
conf.register_group(taskflow_group)
|
||||
conf.register_opts(instance_failure_options, group=instance_recovery_group)
|
||||
conf.register_opts(host_failure_opts, group=host_recovery_group)
|
||||
conf.register_opts(process_failure_opts, group=process_recovery_group)
|
||||
conf.register_opts(taskflow_driver_recovery_flows,
|
||||
group=customized_recovery_flow_group)
|
||||
conf.register_opts(taskflow_options, group=taskflow_group)
|
||||
@ -237,6 +255,7 @@ def list_opts():
|
||||
return {
|
||||
instance_recovery_group.name: instance_failure_options,
|
||||
host_recovery_group.name: host_failure_opts,
|
||||
process_recovery_group.name: process_failure_opts,
|
||||
taskflow_group.name: taskflow_options
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,8 @@ class DisableComputeServiceTask(base.MasakariTask):
|
||||
def execute(self, host_name):
|
||||
msg = "Disabling compute service on host: '%s'" % host_name
|
||||
self.update_details(msg)
|
||||
self.novaclient.enable_disable_service(self.context, host_name)
|
||||
self.novaclient.enable_disable_service(self.context, host_name,
|
||||
reason=CONF.host_failure.service_disable_reason)
|
||||
# Sleep until nova-compute service is marked as disabled.
|
||||
log_msg = ("Sleeping %(wait)s sec before starting recovery "
|
||||
"thread until nova recognizes the node down.")
|
||||
|
@ -45,7 +45,8 @@ class DisableComputeNodeTask(base.MasakariTask):
|
||||
if not self.novaclient.is_service_disabled(self.context, host_name,
|
||||
process_name):
|
||||
# disable compute node on given host
|
||||
self.novaclient.enable_disable_service(self.context, host_name)
|
||||
self.novaclient.enable_disable_service(self.context, host_name,
|
||||
reason=CONF.process_failure.service_disable_reason)
|
||||
msg = "Disabled compute service on host: '%s'" % host_name
|
||||
self.update_details(msg, 1.0)
|
||||
else:
|
||||
|
@ -52,6 +52,7 @@ class HostFailureTestCase(test.TestCase):
|
||||
self.instance_host = "fake-host"
|
||||
self.novaclient = nova.API()
|
||||
self.fake_client = fakes.FakeNovaClient()
|
||||
self.disabled_reason = CONF.host_failure.service_disable_reason
|
||||
|
||||
def _verify_instance_evacuated(self, old_instance_list):
|
||||
for server in old_instance_list:
|
||||
@ -86,7 +87,7 @@ class HostFailureTestCase(test.TestCase):
|
||||
task.execute(self.instance_host)
|
||||
|
||||
mock_enable_disable.assert_called_once_with(
|
||||
self.ctxt, self.instance_host)
|
||||
self.ctxt, self.instance_host, reason=self.disabled_reason)
|
||||
|
||||
def _test_instance_list(self, instances_evacuation_count):
|
||||
task = host_failure.PrepareHAEnabledInstancesTask(self.ctxt,
|
||||
|
@ -20,12 +20,15 @@ Unit Tests for process failure TaskFlow
|
||||
from unittest import mock
|
||||
|
||||
from masakari.compute import nova
|
||||
from masakari import conf
|
||||
from masakari import context
|
||||
from masakari.engine.drivers.taskflow import process_failure
|
||||
from masakari import exception
|
||||
from masakari import test
|
||||
from masakari.tests.unit import fakes
|
||||
|
||||
CONF = conf.CONF
|
||||
|
||||
|
||||
class ProcessFailureTestCase(test.TestCase):
|
||||
|
||||
@ -39,6 +42,7 @@ class ProcessFailureTestCase(test.TestCase):
|
||||
# overriding 'wait_period_after_service_update' to 2 seconds
|
||||
# to reduce the wait period.
|
||||
self.override_config('wait_period_after_service_update', 2)
|
||||
self.disabled_reason = CONF.process_failure.service_disable_reason
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||
|
@ -172,6 +172,13 @@ class FakeNovaClient(object):
|
||||
services.append(service)
|
||||
return services
|
||||
|
||||
def disable_log_reason(self, service_id, reason):
|
||||
for _service in self._services:
|
||||
if _service.id == service_id:
|
||||
service = _service
|
||||
service.status = 'disabled'
|
||||
service.disabled_reason = reason
|
||||
|
||||
def __init__(self):
|
||||
self.servers = FakeNovaClient.ServerManager()
|
||||
self.services = FakeNovaClient.Services()
|
||||
|
@ -0,0 +1,6 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Nova compute service "disable reason" is now set
|
||||
in case of host or process failure.
|
||||
It can be customised per type of failure via config.
|
Loading…
Reference in New Issue
Block a user