From c861437b52ca0c415c4239dbf86da25877e0954e Mon Sep 17 00:00:00 2001 From: Mitya_Eremeev Date: Wed, 14 Jul 2021 12:11:01 +0300 Subject: [PATCH] Set "disabled reason" for compute service. Masakari never sets reason why compute service was disabled. "disabled reason" was added in config. Closes-Bug: 1936181 Change-Id: I998f7884195b93927773c7186d61c13670a53662 --- masakari/conf/engine_driver.py | 19 +++++++++++++++++++ .../engine/drivers/taskflow/host_failure.py | 3 ++- .../drivers/taskflow/process_failure.py | 3 ++- .../taskflow/test_host_failure_flow.py | 3 ++- .../taskflow/test_process_failure_flow.py | 4 ++++ masakari/tests/unit/fakes.py | 7 +++++++ ...mpute-disable-reason-9570734c0bb888cf.yaml | 6 ++++++ 7 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/compute-disable-reason-9570734c0bb888cf.yaml diff --git a/masakari/conf/engine_driver.py b/masakari/conf/engine_driver.py index 0bc3bbdc..d053d574 100644 --- a/masakari/conf/engine_driver.py +++ b/masakari/conf/engine_driver.py @@ -27,6 +27,11 @@ host_recovery_group = cfg.OptGroup( title='Host failure recovery options', help="Configuration options for host failure recovery") +process_recovery_group = cfg.OptGroup( + 'process_failure', + title='Process failure recovery options', + help="Configuration options for process failure recovery") + customized_recovery_flow_group = cfg.OptGroup( 'taskflow_driver_recovery_flows', title='Customized recovery flow Options', @@ -80,6 +85,10 @@ Operators can decide whether reserved_host should be added to aggregate group of failed compute host. When set to True, reserved host will be added to the aggregate group of failed compute host. When set to False, the reserved_host will not be added to the aggregate group of failed compute host."""), + cfg.StrOpt("service_disable_reason", + default="Masakari detected host failed.", + help="Compute disable reason in case Masakari detects host " + "failure."), ] instance_failure_options = [ @@ -220,14 +229,23 @@ The allowed values for this option is comma separated dictionary of object names in between ``{`` and ``}``.""")) ] +process_failure_opts = [ + cfg.StrOpt("service_disable_reason", + default="Masakari detected process failed.", + help="Compute disable reason in case Masakari detects process " + "failure."), +] + def register_opts(conf): conf.register_group(instance_recovery_group) conf.register_group(host_recovery_group) + conf.register_group(process_recovery_group) conf.register_group(customized_recovery_flow_group) conf.register_group(taskflow_group) conf.register_opts(instance_failure_options, group=instance_recovery_group) conf.register_opts(host_failure_opts, group=host_recovery_group) + conf.register_opts(process_failure_opts, group=process_recovery_group) conf.register_opts(taskflow_driver_recovery_flows, group=customized_recovery_flow_group) conf.register_opts(taskflow_options, group=taskflow_group) @@ -237,6 +255,7 @@ def list_opts(): return { instance_recovery_group.name: instance_failure_options, host_recovery_group.name: host_failure_opts, + process_recovery_group.name: process_failure_opts, taskflow_group.name: taskflow_options } diff --git a/masakari/engine/drivers/taskflow/host_failure.py b/masakari/engine/drivers/taskflow/host_failure.py index a70015cc..109f0eef 100644 --- a/masakari/engine/drivers/taskflow/host_failure.py +++ b/masakari/engine/drivers/taskflow/host_failure.py @@ -48,7 +48,8 @@ class DisableComputeServiceTask(base.MasakariTask): def execute(self, host_name): msg = "Disabling compute service on host: '%s'" % host_name self.update_details(msg) - self.novaclient.enable_disable_service(self.context, host_name) + self.novaclient.enable_disable_service(self.context, host_name, + reason=CONF.host_failure.service_disable_reason) # Sleep until nova-compute service is marked as disabled. log_msg = ("Sleeping %(wait)s sec before starting recovery " "thread until nova recognizes the node down.") diff --git a/masakari/engine/drivers/taskflow/process_failure.py b/masakari/engine/drivers/taskflow/process_failure.py index 12237588..d8d83f9c 100644 --- a/masakari/engine/drivers/taskflow/process_failure.py +++ b/masakari/engine/drivers/taskflow/process_failure.py @@ -45,7 +45,8 @@ class DisableComputeNodeTask(base.MasakariTask): if not self.novaclient.is_service_disabled(self.context, host_name, process_name): # disable compute node on given host - self.novaclient.enable_disable_service(self.context, host_name) + self.novaclient.enable_disable_service(self.context, host_name, + reason=CONF.process_failure.service_disable_reason) msg = "Disabled compute service on host: '%s'" % host_name self.update_details(msg, 1.0) else: diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py index 9428e21f..bce6c379 100644 --- a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py +++ b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py @@ -52,6 +52,7 @@ class HostFailureTestCase(test.TestCase): self.instance_host = "fake-host" self.novaclient = nova.API() self.fake_client = fakes.FakeNovaClient() + self.disabled_reason = CONF.host_failure.service_disable_reason def _verify_instance_evacuated(self, old_instance_list): for server in old_instance_list: @@ -86,7 +87,7 @@ class HostFailureTestCase(test.TestCase): task.execute(self.instance_host) mock_enable_disable.assert_called_once_with( - self.ctxt, self.instance_host) + self.ctxt, self.instance_host, reason=self.disabled_reason) def _test_instance_list(self, instances_evacuation_count): task = host_failure.PrepareHAEnabledInstancesTask(self.ctxt, diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_process_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_process_failure_flow.py index f8bbee54..3f27ed8b 100644 --- a/masakari/tests/unit/engine/drivers/taskflow/test_process_failure_flow.py +++ b/masakari/tests/unit/engine/drivers/taskflow/test_process_failure_flow.py @@ -20,12 +20,15 @@ Unit Tests for process failure TaskFlow from unittest import mock from masakari.compute import nova +from masakari import conf from masakari import context from masakari.engine.drivers.taskflow import process_failure from masakari import exception from masakari import test from masakari.tests.unit import fakes +CONF = conf.CONF + class ProcessFailureTestCase(test.TestCase): @@ -39,6 +42,7 @@ class ProcessFailureTestCase(test.TestCase): # overriding 'wait_period_after_service_update' to 2 seconds # to reduce the wait period. self.override_config('wait_period_after_service_update', 2) + self.disabled_reason = CONF.process_failure.service_disable_reason @mock.patch('masakari.compute.nova.novaclient') @mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.' diff --git a/masakari/tests/unit/fakes.py b/masakari/tests/unit/fakes.py index 227af1e8..8f6fb771 100644 --- a/masakari/tests/unit/fakes.py +++ b/masakari/tests/unit/fakes.py @@ -172,6 +172,13 @@ class FakeNovaClient(object): services.append(service) return services + def disable_log_reason(self, service_id, reason): + for _service in self._services: + if _service.id == service_id: + service = _service + service.status = 'disabled' + service.disabled_reason = reason + def __init__(self): self.servers = FakeNovaClient.ServerManager() self.services = FakeNovaClient.Services() diff --git a/releasenotes/notes/compute-disable-reason-9570734c0bb888cf.yaml b/releasenotes/notes/compute-disable-reason-9570734c0bb888cf.yaml new file mode 100644 index 00000000..b84ed8ad --- /dev/null +++ b/releasenotes/notes/compute-disable-reason-9570734c0bb888cf.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Nova compute service "disable reason" is now set + in case of host or process failure. + It can be customised per type of failure via config.