diff --git a/masakari/conf/engine_driver.py b/masakari/conf/engine_driver.py index 12b9f3b4..0bc3bbdc 100644 --- a/masakari/conf/engine_driver.py +++ b/masakari/conf/engine_driver.py @@ -43,13 +43,26 @@ host_failure_opts = [ cfg.BoolOpt('evacuate_all_instances', default=True, help=""" -Operators can decide whether all instances or only those instances which -contain metadata key 'HA_Enabled=True' should be allowed for evacuation from -a failed source compute node. When set to True, it will evacuate all instances -from a failed source compute node. First preference will be given to those -instances which contain 'HA_Enabled=True' metadata key, and then it will -evacuate the remaining ones. When set to False, it will evacuate only those -instances which contain 'HA_Enabled=True' metadata key."""), +Operators can decide whether all instances or only those instances which have +``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True`` should be +allowed for evacuation from a failed source compute node. +When set to True, it will evacuate all instances from a failed source compute +node. +First preference will be given to those instances which have +``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True``, +and then it will evacuate the remaining ones. +When set to False, it will evacuate only those instances which have +``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True``. + """), + + cfg.StrOpt('ha_enabled_instance_metadata_key', + default='HA_Enabled', + help=""" +Operators can decide on the instance metadata key naming that affects the +per-instance behaviour of ``[host_failure]\\evacuate_all_instances``. +The default is the same for both failure types (host, instance) but the value +can be overridden to make the metadata key different per failure type. + """), cfg.BoolOpt('ignore_instances_in_error_state', default=False, @@ -74,12 +87,24 @@ instance_failure_options = [ default=False, help=""" Operators can decide whether all instances or only those instances which -contain metadata key 'HA_Enabled=True' should be taken into account to -recover from instance failure events. When set to True, it will execute -instance failure recovery actions for an instance irrespective of whether -that particular instance contains metadata key 'HA_Enabled=True' or not. +have ``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True`` +should be taken into account to recover from instance failure events. +When set to True, it will execute instance failure recovery actions for an +instance irrespective of whether that particular instance has +``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True``. When set to False, it will only execute instance failure recovery actions -for an instance which contain metadata key 'HA_Enabled=True'."""), +for an instance which has +``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True``. + """), + + cfg.StrOpt('ha_enabled_instance_metadata_key', + default='HA_Enabled', + help=""" +Operators can decide on the instance metadata key naming that affects the +per-instance behaviour of ``[instance_failure]\\process_all_instances``. +The default is the same for both failure types (host, instance) but the value +can be overridden to make the metadata key different per failure type. + """), ] taskflow_options = [ diff --git a/masakari/engine/drivers/taskflow/host_failure.py b/masakari/engine/drivers/taskflow/host_failure.py index a9f6427e..ddcc5c87 100644 --- a/masakari/engine/drivers/taskflow/host_failure.py +++ b/masakari/engine/drivers/taskflow/host_failure.py @@ -73,9 +73,11 @@ class PrepareHAEnabledInstancesTask(base.MasakariTask): ha_enabled_instances = [] non_ha_enabled_instances = [] + ha_enabled_key = CONF.host_failure.ha_enabled_instance_metadata_key + for instance in instance_list: is_instance_ha_enabled = strutils.bool_from_string( - instance.metadata.get('HA_Enabled', False)) + instance.metadata.get(ha_enabled_key, False)) if CONF.host_failure.ignore_instances_in_error_state and ( getattr(instance, "OS-EXT-STS:vm_state") == "error"): if is_instance_ha_enabled: diff --git a/masakari/engine/drivers/taskflow/instance_failure.py b/masakari/engine/drivers/taskflow/instance_failure.py index 3b9276cb..692bd9d4 100644 --- a/masakari/engine/drivers/taskflow/instance_failure.py +++ b/masakari/engine/drivers/taskflow/instance_failure.py @@ -43,12 +43,14 @@ class StopInstanceTask(base.MasakariTask): """Stop the instance for recovery.""" instance = self.novaclient.get_server(self.context, instance_uuid) + ha_enabled_key = CONF.instance_failure.ha_enabled_instance_metadata_key + # If an instance is not HA_Enabled and "process_all_instances" config # option is also disabled, then there is no need to take any recovery # action. if not CONF.instance_failure.process_all_instances and not ( strutils.bool_from_string( - instance.metadata.get('HA_Enabled', False))): + instance.metadata.get(ha_enabled_key, False))): msg = ("Skipping recovery for instance: %(instance_uuid)s as it is" " not Ha_Enabled") % {'instance_uuid': instance_uuid} LOG.info(msg) diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py index f63bb1ec..5f70ede3 100644 --- a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py +++ b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py @@ -99,7 +99,9 @@ class HostFailureTestCase(test.TestCase): self.assertNotEqual("error", getattr(instance, "OS-EXT-STS:vm_state")) if not CONF.host_failure.evacuate_all_instances: - self.assertTrue(instance.metadata.get('HA_Enabled', False)) + ha_enabled_key = (CONF.host_failure + .ha_enabled_instance_metadata_key) + self.assertTrue(instance.metadata.get(ha_enabled_key, False)) instance_uuid_list.append(instance.id) @@ -207,6 +209,54 @@ class HostFailureTestCase(test.TestCase): mock.call('Evacuation process completed!', 1.0) ]) + @mock.patch('masakari.compute.nova.novaclient') + @mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.' + 'update_details') + def test_host_failure_flow_for_auto_recovery_custom_ha_key( + self, _mock_notify, _mock_novaclient, mock_unlock, mock_lock, + mock_enable_disable): + _mock_novaclient.return_value = self.fake_client + self.override_config("evacuate_all_instances", + False, "host_failure") + + ha_enabled_key = 'Ensure-My-HA' + + self.override_config('ha_enabled_instance_metadata_key', + ha_enabled_key, 'host_failure') + + # create test data + self.fake_client.servers.create(id="1", host=self.instance_host, + ha_enabled=True) + self.fake_client.servers.create(id="2", host=self.instance_host, + ha_enabled=True, + ha_enabled_key=ha_enabled_key) + + # execute DisableComputeServiceTask + self._test_disable_compute_service(mock_enable_disable) + + # execute PrepareHAEnabledInstancesTask + instance_list = self._test_instance_list(1) + + # execute EvacuateInstancesTask + self._evacuate_instances(instance_list, mock_enable_disable) + + # verify progress details + _mock_notify.assert_has_calls([ + mock.call("Disabling compute service on host: 'fake-host'"), + mock.call("Disabled compute service on host: 'fake-host'", 1.0), + mock.call('Preparing instances for evacuation'), + mock.call("Total instances running on failed host 'fake-host' is 2" + "", 0.3), + mock.call("Total HA Enabled instances count: '1'", 0.6), + mock.call("Instances to be evacuated are: '2'", 1.0), + mock.call("Start evacuation of instances from failed host " + "'fake-host', instance uuids are: '2'"), + mock.call("Evacuation of instance started: '2'", 0.5), + mock.call("Successfully evacuate instances '2' from host " + "'fake-host'", 0.7), + mock.call('Evacuation process completed!', 1.0) + ]) + @mock.patch('masakari.compute.nova.novaclient') @mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.' 'update_details') diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_instance_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_instance_failure_flow.py index 5ae18618..3c2db18c 100644 --- a/masakari/tests/unit/engine/drivers/taskflow/test_instance_failure_flow.py +++ b/masakari/tests/unit/engine/drivers/taskflow/test_instance_failure_flow.py @@ -93,6 +93,46 @@ class InstanceFailureTestCase(test.TestCase): "' vm_state is ACTIVE", 1.0) ]) + @mock.patch('masakari.compute.nova.novaclient') + @mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.' + 'update_details') + def test_instance_failure_flow_custom_ha_key( + self, _mock_notify, _mock_novaclient): + _mock_novaclient.return_value = self.fake_client + + ha_enabled_key = 'Ensure-My-HA' + + self.override_config('ha_enabled_instance_metadata_key', + ha_enabled_key, 'instance_failure') + + # create test data with custom ha_enabled_key + self.fake_client.servers.create(self.instance_id, + host="fake-host", + ha_enabled=True, + ha_enabled_key=ha_enabled_key) + + # test StopInstanceTask + self._test_stop_instance() + + # test StartInstanceTask + task = instance_failure.StartInstanceTask(self.ctxt, self.novaclient) + task.execute(self.instance_id) + + # test ConfirmInstanceActiveTask + self._test_confirm_instance_is_active() + + # verify progress details + _mock_notify.assert_has_calls([ + mock.call('Stopping instance: ' + self.instance_id), + mock.call("Stopped instance: '" + self.instance_id + "'", 1.0), + mock.call("Starting instance: '" + self.instance_id + "'"), + mock.call("Instance started: '" + self.instance_id + "'", 1.0), + mock.call("Confirming instance '" + self.instance_id + + "' vm_state is ACTIVE"), + mock.call("Confirmed instance '" + self.instance_id + + "' vm_state is ACTIVE", 1.0) + ]) + @mock.patch('masakari.compute.nova.novaclient') @mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.' 'update_details') diff --git a/masakari/tests/unit/fakes.py b/masakari/tests/unit/fakes.py index 19a80a16..74c38576 100644 --- a/masakari/tests/unit/fakes.py +++ b/masakari/tests/unit/fakes.py @@ -41,7 +41,7 @@ class FakeNovaClient(object): class Server(object): def __init__(self, id=None, uuid=None, host=None, vm_state=None, task_state=None, power_state=1, ha_enabled=None, - locked=False): + ha_enabled_key='HA_Enabled', locked=False): self.id = id self.uuid = uuid or uuidutils.generate_uuid() self.host = host @@ -49,7 +49,7 @@ class FakeNovaClient(object): setattr(self, 'OS-EXT-STS:vm_state', vm_state) setattr(self, 'OS-EXT-STS:task_state', task_state) setattr(self, 'OS-EXT-STS:power_state', power_state) - self.metadata = {"HA_Enabled": ha_enabled} + self.metadata = {ha_enabled_key: ha_enabled} self.locked = locked class ServerManager(object): @@ -59,12 +59,14 @@ class FakeNovaClient(object): self.stop_calls = [] def create(self, id, uuid=None, host=None, vm_state='active', - task_state=None, power_state=1, ha_enabled=False): + task_state=None, power_state=1, ha_enabled=False, + ha_enabled_key='HA_Enabled'): server = FakeNovaClient.Server(id=id, uuid=uuid, host=host, vm_state=vm_state, task_state=task_state, power_state=power_state, - ha_enabled=ha_enabled) + ha_enabled=ha_enabled, + ha_enabled_key=ha_enabled_key) self._servers.append(server) return server diff --git a/releasenotes/notes/customisable-ha-enabled-instance-metadata-key-af511ea2aac96690.yaml b/releasenotes/notes/customisable-ha-enabled-instance-metadata-key-af511ea2aac96690.yaml new file mode 100644 index 00000000..dbaace65 --- /dev/null +++ b/releasenotes/notes/customisable-ha-enabled-instance-metadata-key-af511ea2aac96690.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Adds ``ha_enabled_instance_metadata_key`` config option to ``host_failure`` + and ``instance_failure`` config groups. This option allows operators to + override the default ``HA_Enabled`` instance metadata key which controls + the behaviour of Masakari towards the instance. This way one can have + different keys for different failure types (host vs instance failures).