Add ha_enabled_instance_metadata_key config option
Previously Masakari enforced a common instance metadata key that controlled how the HA protection applied to the instance: "HA_Enabled". Moreover, it was the same whether considering the instance or host failure. There are cases where users (operators) would like to independently configure it per failure class and in general customize its name. This patch implements the requested behaviour. New tests are included. Change-Id: I9dddf784da422f239ec304a2c0b8f24625914fc5 Implements: blueprint customisable-ha-enabled-instance-metadata-key
This commit is contained in:
parent
9492987c31
commit
4397088da7
@ -43,13 +43,26 @@ host_failure_opts = [
|
|||||||
cfg.BoolOpt('evacuate_all_instances',
|
cfg.BoolOpt('evacuate_all_instances',
|
||||||
default=True,
|
default=True,
|
||||||
help="""
|
help="""
|
||||||
Operators can decide whether all instances or only those instances which
|
Operators can decide whether all instances or only those instances which have
|
||||||
contain metadata key 'HA_Enabled=True' should be allowed for evacuation from
|
``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True`` should be
|
||||||
a failed source compute node. When set to True, it will evacuate all instances
|
allowed for evacuation from a failed source compute node.
|
||||||
from a failed source compute node. First preference will be given to those
|
When set to True, it will evacuate all instances from a failed source compute
|
||||||
instances which contain 'HA_Enabled=True' metadata key, and then it will
|
node.
|
||||||
evacuate the remaining ones. When set to False, it will evacuate only those
|
First preference will be given to those instances which have
|
||||||
instances which contain 'HA_Enabled=True' metadata key."""),
|
``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True``,
|
||||||
|
and then it will evacuate the remaining ones.
|
||||||
|
When set to False, it will evacuate only those instances which have
|
||||||
|
``[host_failure]\\ha_enabled_instance_metadata_key`` set to ``True``.
|
||||||
|
"""),
|
||||||
|
|
||||||
|
cfg.StrOpt('ha_enabled_instance_metadata_key',
|
||||||
|
default='HA_Enabled',
|
||||||
|
help="""
|
||||||
|
Operators can decide on the instance metadata key naming that affects the
|
||||||
|
per-instance behaviour of ``[host_failure]\\evacuate_all_instances``.
|
||||||
|
The default is the same for both failure types (host, instance) but the value
|
||||||
|
can be overridden to make the metadata key different per failure type.
|
||||||
|
"""),
|
||||||
|
|
||||||
cfg.BoolOpt('ignore_instances_in_error_state',
|
cfg.BoolOpt('ignore_instances_in_error_state',
|
||||||
default=False,
|
default=False,
|
||||||
@ -74,12 +87,24 @@ instance_failure_options = [
|
|||||||
default=False,
|
default=False,
|
||||||
help="""
|
help="""
|
||||||
Operators can decide whether all instances or only those instances which
|
Operators can decide whether all instances or only those instances which
|
||||||
contain metadata key 'HA_Enabled=True' should be taken into account to
|
have ``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True``
|
||||||
recover from instance failure events. When set to True, it will execute
|
should be taken into account to recover from instance failure events.
|
||||||
instance failure recovery actions for an instance irrespective of whether
|
When set to True, it will execute instance failure recovery actions for an
|
||||||
that particular instance contains metadata key 'HA_Enabled=True' or not.
|
instance irrespective of whether that particular instance has
|
||||||
|
``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True``.
|
||||||
When set to False, it will only execute instance failure recovery actions
|
When set to False, it will only execute instance failure recovery actions
|
||||||
for an instance which contain metadata key 'HA_Enabled=True'."""),
|
for an instance which has
|
||||||
|
``[instance_failure]\\ha_enabled_instance_metadata_key`` set to ``True``.
|
||||||
|
"""),
|
||||||
|
|
||||||
|
cfg.StrOpt('ha_enabled_instance_metadata_key',
|
||||||
|
default='HA_Enabled',
|
||||||
|
help="""
|
||||||
|
Operators can decide on the instance metadata key naming that affects the
|
||||||
|
per-instance behaviour of ``[instance_failure]\\process_all_instances``.
|
||||||
|
The default is the same for both failure types (host, instance) but the value
|
||||||
|
can be overridden to make the metadata key different per failure type.
|
||||||
|
"""),
|
||||||
]
|
]
|
||||||
|
|
||||||
taskflow_options = [
|
taskflow_options = [
|
||||||
|
@ -73,9 +73,11 @@ class PrepareHAEnabledInstancesTask(base.MasakariTask):
|
|||||||
ha_enabled_instances = []
|
ha_enabled_instances = []
|
||||||
non_ha_enabled_instances = []
|
non_ha_enabled_instances = []
|
||||||
|
|
||||||
|
ha_enabled_key = CONF.host_failure.ha_enabled_instance_metadata_key
|
||||||
|
|
||||||
for instance in instance_list:
|
for instance in instance_list:
|
||||||
is_instance_ha_enabled = strutils.bool_from_string(
|
is_instance_ha_enabled = strutils.bool_from_string(
|
||||||
instance.metadata.get('HA_Enabled', False))
|
instance.metadata.get(ha_enabled_key, False))
|
||||||
if CONF.host_failure.ignore_instances_in_error_state and (
|
if CONF.host_failure.ignore_instances_in_error_state and (
|
||||||
getattr(instance, "OS-EXT-STS:vm_state") == "error"):
|
getattr(instance, "OS-EXT-STS:vm_state") == "error"):
|
||||||
if is_instance_ha_enabled:
|
if is_instance_ha_enabled:
|
||||||
|
@ -43,12 +43,14 @@ class StopInstanceTask(base.MasakariTask):
|
|||||||
"""Stop the instance for recovery."""
|
"""Stop the instance for recovery."""
|
||||||
instance = self.novaclient.get_server(self.context, instance_uuid)
|
instance = self.novaclient.get_server(self.context, instance_uuid)
|
||||||
|
|
||||||
|
ha_enabled_key = CONF.instance_failure.ha_enabled_instance_metadata_key
|
||||||
|
|
||||||
# If an instance is not HA_Enabled and "process_all_instances" config
|
# If an instance is not HA_Enabled and "process_all_instances" config
|
||||||
# option is also disabled, then there is no need to take any recovery
|
# option is also disabled, then there is no need to take any recovery
|
||||||
# action.
|
# action.
|
||||||
if not CONF.instance_failure.process_all_instances and not (
|
if not CONF.instance_failure.process_all_instances and not (
|
||||||
strutils.bool_from_string(
|
strutils.bool_from_string(
|
||||||
instance.metadata.get('HA_Enabled', False))):
|
instance.metadata.get(ha_enabled_key, False))):
|
||||||
msg = ("Skipping recovery for instance: %(instance_uuid)s as it is"
|
msg = ("Skipping recovery for instance: %(instance_uuid)s as it is"
|
||||||
" not Ha_Enabled") % {'instance_uuid': instance_uuid}
|
" not Ha_Enabled") % {'instance_uuid': instance_uuid}
|
||||||
LOG.info(msg)
|
LOG.info(msg)
|
||||||
|
@ -99,7 +99,9 @@ class HostFailureTestCase(test.TestCase):
|
|||||||
self.assertNotEqual("error",
|
self.assertNotEqual("error",
|
||||||
getattr(instance, "OS-EXT-STS:vm_state"))
|
getattr(instance, "OS-EXT-STS:vm_state"))
|
||||||
if not CONF.host_failure.evacuate_all_instances:
|
if not CONF.host_failure.evacuate_all_instances:
|
||||||
self.assertTrue(instance.metadata.get('HA_Enabled', False))
|
ha_enabled_key = (CONF.host_failure
|
||||||
|
.ha_enabled_instance_metadata_key)
|
||||||
|
self.assertTrue(instance.metadata.get(ha_enabled_key, False))
|
||||||
|
|
||||||
instance_uuid_list.append(instance.id)
|
instance_uuid_list.append(instance.id)
|
||||||
|
|
||||||
@ -207,6 +209,54 @@ class HostFailureTestCase(test.TestCase):
|
|||||||
mock.call('Evacuation process completed!', 1.0)
|
mock.call('Evacuation process completed!', 1.0)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@mock.patch('masakari.compute.nova.novaclient')
|
||||||
|
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||||
|
'update_details')
|
||||||
|
def test_host_failure_flow_for_auto_recovery_custom_ha_key(
|
||||||
|
self, _mock_notify, _mock_novaclient, mock_unlock, mock_lock,
|
||||||
|
mock_enable_disable):
|
||||||
|
_mock_novaclient.return_value = self.fake_client
|
||||||
|
self.override_config("evacuate_all_instances",
|
||||||
|
False, "host_failure")
|
||||||
|
|
||||||
|
ha_enabled_key = 'Ensure-My-HA'
|
||||||
|
|
||||||
|
self.override_config('ha_enabled_instance_metadata_key',
|
||||||
|
ha_enabled_key, 'host_failure')
|
||||||
|
|
||||||
|
# create test data
|
||||||
|
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||||
|
ha_enabled=True)
|
||||||
|
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||||
|
ha_enabled=True,
|
||||||
|
ha_enabled_key=ha_enabled_key)
|
||||||
|
|
||||||
|
# execute DisableComputeServiceTask
|
||||||
|
self._test_disable_compute_service(mock_enable_disable)
|
||||||
|
|
||||||
|
# execute PrepareHAEnabledInstancesTask
|
||||||
|
instance_list = self._test_instance_list(1)
|
||||||
|
|
||||||
|
# execute EvacuateInstancesTask
|
||||||
|
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||||
|
|
||||||
|
# verify progress details
|
||||||
|
_mock_notify.assert_has_calls([
|
||||||
|
mock.call("Disabling compute service on host: 'fake-host'"),
|
||||||
|
mock.call("Disabled compute service on host: 'fake-host'", 1.0),
|
||||||
|
mock.call('Preparing instances for evacuation'),
|
||||||
|
mock.call("Total instances running on failed host 'fake-host' is 2"
|
||||||
|
"", 0.3),
|
||||||
|
mock.call("Total HA Enabled instances count: '1'", 0.6),
|
||||||
|
mock.call("Instances to be evacuated are: '2'", 1.0),
|
||||||
|
mock.call("Start evacuation of instances from failed host "
|
||||||
|
"'fake-host', instance uuids are: '2'"),
|
||||||
|
mock.call("Evacuation of instance started: '2'", 0.5),
|
||||||
|
mock.call("Successfully evacuate instances '2' from host "
|
||||||
|
"'fake-host'", 0.7),
|
||||||
|
mock.call('Evacuation process completed!', 1.0)
|
||||||
|
])
|
||||||
|
|
||||||
@mock.patch('masakari.compute.nova.novaclient')
|
@mock.patch('masakari.compute.nova.novaclient')
|
||||||
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||||
'update_details')
|
'update_details')
|
||||||
|
@ -93,6 +93,46 @@ class InstanceFailureTestCase(test.TestCase):
|
|||||||
"' vm_state is ACTIVE", 1.0)
|
"' vm_state is ACTIVE", 1.0)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@mock.patch('masakari.compute.nova.novaclient')
|
||||||
|
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||||
|
'update_details')
|
||||||
|
def test_instance_failure_flow_custom_ha_key(
|
||||||
|
self, _mock_notify, _mock_novaclient):
|
||||||
|
_mock_novaclient.return_value = self.fake_client
|
||||||
|
|
||||||
|
ha_enabled_key = 'Ensure-My-HA'
|
||||||
|
|
||||||
|
self.override_config('ha_enabled_instance_metadata_key',
|
||||||
|
ha_enabled_key, 'instance_failure')
|
||||||
|
|
||||||
|
# create test data with custom ha_enabled_key
|
||||||
|
self.fake_client.servers.create(self.instance_id,
|
||||||
|
host="fake-host",
|
||||||
|
ha_enabled=True,
|
||||||
|
ha_enabled_key=ha_enabled_key)
|
||||||
|
|
||||||
|
# test StopInstanceTask
|
||||||
|
self._test_stop_instance()
|
||||||
|
|
||||||
|
# test StartInstanceTask
|
||||||
|
task = instance_failure.StartInstanceTask(self.ctxt, self.novaclient)
|
||||||
|
task.execute(self.instance_id)
|
||||||
|
|
||||||
|
# test ConfirmInstanceActiveTask
|
||||||
|
self._test_confirm_instance_is_active()
|
||||||
|
|
||||||
|
# verify progress details
|
||||||
|
_mock_notify.assert_has_calls([
|
||||||
|
mock.call('Stopping instance: ' + self.instance_id),
|
||||||
|
mock.call("Stopped instance: '" + self.instance_id + "'", 1.0),
|
||||||
|
mock.call("Starting instance: '" + self.instance_id + "'"),
|
||||||
|
mock.call("Instance started: '" + self.instance_id + "'", 1.0),
|
||||||
|
mock.call("Confirming instance '" + self.instance_id +
|
||||||
|
"' vm_state is ACTIVE"),
|
||||||
|
mock.call("Confirmed instance '" + self.instance_id +
|
||||||
|
"' vm_state is ACTIVE", 1.0)
|
||||||
|
])
|
||||||
|
|
||||||
@mock.patch('masakari.compute.nova.novaclient')
|
@mock.patch('masakari.compute.nova.novaclient')
|
||||||
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
|
||||||
'update_details')
|
'update_details')
|
||||||
|
@ -41,7 +41,7 @@ class FakeNovaClient(object):
|
|||||||
class Server(object):
|
class Server(object):
|
||||||
def __init__(self, id=None, uuid=None, host=None, vm_state=None,
|
def __init__(self, id=None, uuid=None, host=None, vm_state=None,
|
||||||
task_state=None, power_state=1, ha_enabled=None,
|
task_state=None, power_state=1, ha_enabled=None,
|
||||||
locked=False):
|
ha_enabled_key='HA_Enabled', locked=False):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.uuid = uuid or uuidutils.generate_uuid()
|
self.uuid = uuid or uuidutils.generate_uuid()
|
||||||
self.host = host
|
self.host = host
|
||||||
@ -49,7 +49,7 @@ class FakeNovaClient(object):
|
|||||||
setattr(self, 'OS-EXT-STS:vm_state', vm_state)
|
setattr(self, 'OS-EXT-STS:vm_state', vm_state)
|
||||||
setattr(self, 'OS-EXT-STS:task_state', task_state)
|
setattr(self, 'OS-EXT-STS:task_state', task_state)
|
||||||
setattr(self, 'OS-EXT-STS:power_state', power_state)
|
setattr(self, 'OS-EXT-STS:power_state', power_state)
|
||||||
self.metadata = {"HA_Enabled": ha_enabled}
|
self.metadata = {ha_enabled_key: ha_enabled}
|
||||||
self.locked = locked
|
self.locked = locked
|
||||||
|
|
||||||
class ServerManager(object):
|
class ServerManager(object):
|
||||||
@ -59,12 +59,14 @@ class FakeNovaClient(object):
|
|||||||
self.stop_calls = []
|
self.stop_calls = []
|
||||||
|
|
||||||
def create(self, id, uuid=None, host=None, vm_state='active',
|
def create(self, id, uuid=None, host=None, vm_state='active',
|
||||||
task_state=None, power_state=1, ha_enabled=False):
|
task_state=None, power_state=1, ha_enabled=False,
|
||||||
|
ha_enabled_key='HA_Enabled'):
|
||||||
server = FakeNovaClient.Server(id=id, uuid=uuid, host=host,
|
server = FakeNovaClient.Server(id=id, uuid=uuid, host=host,
|
||||||
vm_state=vm_state,
|
vm_state=vm_state,
|
||||||
task_state=task_state,
|
task_state=task_state,
|
||||||
power_state=power_state,
|
power_state=power_state,
|
||||||
ha_enabled=ha_enabled)
|
ha_enabled=ha_enabled,
|
||||||
|
ha_enabled_key=ha_enabled_key)
|
||||||
self._servers.append(server)
|
self._servers.append(server)
|
||||||
return server
|
return server
|
||||||
|
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Adds ``ha_enabled_instance_metadata_key`` config option to ``host_failure``
|
||||||
|
and ``instance_failure`` config groups. This option allows operators to
|
||||||
|
override the default ``HA_Enabled`` instance metadata key which controls
|
||||||
|
the behaviour of Masakari towards the instance. This way one can have
|
||||||
|
different keys for different failure types (host vs instance failures).
|
Loading…
x
Reference in New Issue
Block a user