Merge "Make 'error' instances recovery configurable"
This commit is contained in:
commit
93556d3116
@ -39,6 +39,15 @@ instances which contain 'HA_Enabled=True' metadata key, and then it will
|
||||
evacuate the remaining ones. When set to False, it will evacuate only those
|
||||
instances which contain 'HA_Enabled=True' metadata key."""),
|
||||
|
||||
cfg.BoolOpt('ignore_instances_in_error_state',
|
||||
default=False,
|
||||
help="""
|
||||
Operators can decide whether error instances should be allowed for evacuation
|
||||
from a failed source compute node or not. When set to True, it will ignore
|
||||
error instances from evacuation from a failed source compute node. When set to
|
||||
False, it will evacuate error instances along with other instances from a
|
||||
failed source compute node."""),
|
||||
|
||||
cfg.BoolOpt("add_reserved_host_to_aggregate",
|
||||
default=False,
|
||||
help="""
|
||||
|
@ -70,17 +70,35 @@ class PrepareHAEnabledInstancesTask(base.MasakariTask):
|
||||
self.novaclient = novaclient
|
||||
|
||||
def execute(self, context, host_name):
|
||||
def _filter_instances(instance_list):
|
||||
ha_enabled_instances = []
|
||||
non_ha_enabled_instances = []
|
||||
|
||||
for instance in instance_list:
|
||||
is_instance_ha_enabled = strutils.bool_from_string(
|
||||
instance.metadata.get('HA_Enabled', False))
|
||||
if CONF.host_failure.ignore_instances_in_error_state and (
|
||||
getattr(instance, "OS-EXT-STS:vm_state") == "error"):
|
||||
if is_instance_ha_enabled:
|
||||
msg = ("Ignoring recovery of HA_Enabled instance "
|
||||
"'%(instance_id)s' as it is in 'error' state.")
|
||||
LOG.info(msg, {'instance_id': instance.id})
|
||||
continue
|
||||
|
||||
if is_instance_ha_enabled:
|
||||
ha_enabled_instances.append(instance)
|
||||
else:
|
||||
non_ha_enabled_instances.append(instance)
|
||||
|
||||
if CONF.host_failure.evacuate_all_instances:
|
||||
ha_enabled_instances.extend(non_ha_enabled_instances)
|
||||
|
||||
return ha_enabled_instances
|
||||
|
||||
instance_list = self.novaclient.get_servers(context, host_name)
|
||||
|
||||
if CONF.host_failure.evacuate_all_instances:
|
||||
instance_list = sorted(
|
||||
instance_list, key=lambda k: strutils.bool_from_string(
|
||||
k.metadata.get('HA_Enabled', False)), reverse=True)
|
||||
else:
|
||||
instance_list = (
|
||||
[instance for instance in instance_list if
|
||||
strutils.bool_from_string(instance.metadata.get('HA_Enabled',
|
||||
False))])
|
||||
instance_list = _filter_instances(instance_list)
|
||||
|
||||
if not instance_list:
|
||||
msg = _('No instances to evacuate on host: %s.') % host_name
|
||||
LOG.info(msg)
|
||||
|
@ -53,15 +53,21 @@ class HostFailureTestCase(test.TestCase):
|
||||
self.novaclient = nova.API()
|
||||
self.fake_client = fakes.FakeNovaClient()
|
||||
|
||||
def _verify_instance_evacuated(self):
|
||||
for server in self.novaclient.get_servers(self.ctxt,
|
||||
self.instance_host):
|
||||
def _verify_instance_evacuated(self, old_instance_list):
|
||||
for server in old_instance_list:
|
||||
instance = self.novaclient.get_server(self.ctxt, server.id)
|
||||
self.assertIn(getattr(instance, 'OS-EXT-STS:vm_state'),
|
||||
['active', 'stopped', 'error'])
|
||||
self.assertNotEqual(self.instance_host,
|
||||
getattr(instance,
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'))
|
||||
|
||||
if CONF.host_failure.ignore_instances_in_error_state and getattr(
|
||||
server, 'OS-EXT-STS:vm_state') == 'error':
|
||||
self.assertEqual(
|
||||
self.instance_host, getattr(
|
||||
instance, 'OS-EXT-SRV-ATTR:hypervisor_hostname'))
|
||||
else:
|
||||
self.assertNotEqual(
|
||||
self.instance_host, getattr(
|
||||
instance, 'OS-EXT-SRV-ATTR:hypervisor_hostname'))
|
||||
|
||||
def _test_disable_compute_service(self, mock_enable_disable):
|
||||
task = host_failure.DisableComputeServiceTask(self.novaclient)
|
||||
@ -70,24 +76,27 @@ class HostFailureTestCase(test.TestCase):
|
||||
mock_enable_disable.assert_called_once_with(
|
||||
self.ctxt, self.instance_host)
|
||||
|
||||
def _test_instance_list(self):
|
||||
def _test_instance_list(self, instances_evacuation_count):
|
||||
task = host_failure.PrepareHAEnabledInstancesTask(self.novaclient)
|
||||
instance_list = task.execute(
|
||||
self.ctxt, self.instance_host)
|
||||
evacuate_all_instances = CONF.host_failure.evacuate_all_instances
|
||||
instance_list = task.execute(self.ctxt, self.instance_host)
|
||||
|
||||
if evacuate_all_instances:
|
||||
self.assertEqual(len(self.fake_client.servers.list()),
|
||||
len(instance_list['instance_list']))
|
||||
else:
|
||||
for instance in instance_list['instance_list']:
|
||||
for instance in instance_list['instance_list']:
|
||||
if CONF.host_failure.ignore_instances_in_error_state:
|
||||
self.assertNotEqual("error",
|
||||
getattr(instance, "OS-EXT-STS:vm_state"))
|
||||
if not CONF.host_failure.evacuate_all_instances:
|
||||
self.assertTrue(instance.metadata.get('HA_Enabled', False))
|
||||
|
||||
self.assertEqual(instances_evacuation_count,
|
||||
len(instance_list['instance_list']))
|
||||
|
||||
return instance_list
|
||||
|
||||
def _evacuate_instances(self, instance_list, mock_enable_disable,
|
||||
reserved_host=None):
|
||||
task = host_failure.EvacuateInstancesTask(self.novaclient)
|
||||
old_instance_list = copy.deepcopy(instance_list['instance_list'])
|
||||
|
||||
if reserved_host:
|
||||
task.execute(self.ctxt, self.instance_host,
|
||||
instance_list['instance_list'],
|
||||
@ -99,7 +108,7 @@ class HostFailureTestCase(test.TestCase):
|
||||
self.ctxt, self.instance_host, instance_list['instance_list'])
|
||||
|
||||
# make sure instance is active and has different host
|
||||
self._verify_instance_evacuated()
|
||||
self._verify_instance_evacuated(old_instance_list)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_for_auto_recovery(
|
||||
@ -118,7 +127,7 @@ class HostFailureTestCase(test.TestCase):
|
||||
self._test_disable_compute_service(mock_enable_disable)
|
||||
|
||||
# execute PrepareHAEnabledInstancesTask
|
||||
instance_list = self._test_instance_list()
|
||||
instance_list = self._test_instance_list(2)
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
@ -146,7 +155,7 @@ class HostFailureTestCase(test.TestCase):
|
||||
self._test_disable_compute_service(mock_enable_disable)
|
||||
|
||||
# execute PrepareHAEnabledInstancesTask
|
||||
instance_list = self._test_instance_list()
|
||||
instance_list = self._test_instance_list(2)
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
with mock.patch.object(host_obj.Host, "save") as mock_save:
|
||||
@ -182,6 +191,50 @@ class HostFailureTestCase(test.TestCase):
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_ignore_error_instances(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
self.override_config("ignore_instances_in_error_state",
|
||||
True, "host_failure")
|
||||
self.override_config("evacuate_all_instances",
|
||||
True, "host_failure")
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state='error',
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state='active',
|
||||
ha_enabled=True)
|
||||
|
||||
# execute PrepareHAEnabledInstancesTask
|
||||
instance_list = self._test_instance_list(1)
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_ignore_error_instances_raise_skip_host_recovery(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
self.override_config("ignore_instances_in_error_state",
|
||||
True, "host_failure")
|
||||
self.override_config("evacuate_all_instances",
|
||||
False, "host_failure")
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state='error',
|
||||
ha_enabled=True)
|
||||
|
||||
# execute PrepareHAEnabledInstancesTask
|
||||
task = host_failure.PrepareHAEnabledInstancesTask(self.novaclient)
|
||||
self.assertRaises(exception.SkipHostRecoveryException, task.execute,
|
||||
self.ctxt, self.instance_host)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_all_instances_active_resized_instance(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
|
@ -0,0 +1,17 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Operators can decide whether error instances should be allowed for
|
||||
evacuation along with other instances from a failed source compute node
|
||||
or not. Added a new config option ``ignore_instances_in_error_state`` to
|
||||
achieve this. When set to True, masakari will skip the recovery of error
|
||||
instances otherwise it will evacuate error instances as well from a failed
|
||||
source compute node.
|
||||
|
||||
To use this feature, following config option need to be set under
|
||||
``host_failure`` section in 'masakari.conf' file::
|
||||
|
||||
[host_failure]
|
||||
ignore_instances_in_error_state = False
|
||||
|
||||
The default value for this config option is set to False.
|
Loading…
Reference in New Issue
Block a user