diff --git a/masakari/compute/nova.py b/masakari/compute/nova.py index ed8692ae..fa54319b 100644 --- a/masakari/compute/nova.py +++ b/masakari/compute/nova.py @@ -206,3 +206,20 @@ class API(object): msg = (_LI('Call start server command for instance %(uuid)s')) LOG.info(msg, {'uuid': uuid}) return nova.servers.start(uuid) + + @translate_nova_exception + def get_aggregate_list(self, context): + """Get all aggregate list.""" + nova = novaclient(context) + LOG.info(_LI('Call aggregate-list command to get list of all ' + 'aggregates.')) + return nova.aggregates.list() + + @translate_nova_exception + def add_host_to_aggregate(self, context, host, aggregate): + """Add host to given aggregate.""" + nova = novaclient(context) + msg = _LI("Call add_host command to add host '%(host_name)s' to " + "aggregate '%(aggregate_name)s'.") + LOG.info(msg, {'host_name': host, 'aggregate_name': aggregate}) + return nova.aggregates.add_host(aggregate, host) diff --git a/masakari/conf/engine_driver.py b/masakari/conf/engine_driver.py index 0c8f289d..376df76b 100644 --- a/masakari/conf/engine_driver.py +++ b/masakari/conf/engine_driver.py @@ -38,6 +38,14 @@ from a failed source compute node. First preference will be given to those instances which contain 'HA_Enabled=True' metadata key, and then it will evacuate the remaining ones. When set to False, it will evacuate only those instances which contain 'HA_Enabled=True' metadata key."""), + + cfg.BoolOpt("add_reserved_host_to_aggregate", + default=False, + help=""" +Operators can decide whether reserved_host should be added to aggregate group +of failed compute host. When set to True, reserved host will be added to the +aggregate group of failed compute host. When set to False, the reserved_host +will not be added to the aggregate group of failed compute host."""), ] instance_failure_options = [ diff --git a/masakari/engine/drivers/taskflow/host_failure.py b/masakari/engine/drivers/taskflow/host_failure.py index c02867fb..28a5db80 100644 --- a/masakari/engine/drivers/taskflow/host_failure.py +++ b/masakari/engine/drivers/taskflow/host_failure.py @@ -90,14 +90,30 @@ class EvacuateInstancesTask(base.MasakariTask): default_provides = set(["instance_list"]) def __init__(self, novaclient): - requires = ["instance_list"] + requires = ["host_name", "instance_list"] super(EvacuateInstancesTask, self).__init__(addons=[ACTION], requires=requires) self.novaclient = novaclient - def execute(self, context, instance_list, reserved_host=None): - def _do_evacuate(context, instance_list, reserved_host=None): + def execute(self, context, host_name, instance_list, reserved_host=None): + def _do_evacuate(context, host_name, instance_list, + reserved_host=None): if reserved_host: + if CONF.host_failure.add_reserved_host_to_aggregate: + # Assign reserved_host to an aggregate to which the failed + # compute host belongs to. + aggregates = self.novaclient.get_aggregate_list(context) + for aggregate in aggregates: + if host_name in aggregate.hosts: + self.novaclient.add_host_to_aggregate( + context, reserved_host.name, aggregate.name) + # A failed compute host can be associated with + # multiple aggregates but operators will not + # associate it with multiple aggregates in real + # deployment so adding reserved_host to the very + # first aggregate from the list. + break + self.novaclient.enable_disable_service( context, reserved_host.name, enable=True) @@ -131,17 +147,18 @@ class EvacuateInstancesTask(base.MasakariTask): lock_name = reserved_host.name if reserved_host else None @utils.synchronized(lock_name) - def do_evacuate_with_reserved_host(context, instance_list, + def do_evacuate_with_reserved_host(context, host_name, instance_list, reserved_host): - _do_evacuate(context, instance_list, reserved_host=reserved_host) + _do_evacuate(context, host_name, instance_list, + reserved_host=reserved_host) if lock_name: - do_evacuate_with_reserved_host(context, instance_list, + do_evacuate_with_reserved_host(context, host_name, instance_list, reserved_host) else: # No need to acquire lock on reserved_host when recovery_method is # 'auto' as the selection of compute host will be decided by nova. - _do_evacuate(context, instance_list) + _do_evacuate(context, host_name, instance_list) return { "instance_list": instance_list, diff --git a/masakari/tests/unit/compute/test_nova.py b/masakari/tests/unit/compute/test_nova.py index ac6472ad..f58191c6 100644 --- a/masakari/tests/unit/compute/test_nova.py +++ b/masakari/tests/unit/compute/test_nova.py @@ -238,3 +238,23 @@ class NovaApiTestCase(test.TestCase): mock_novaclient.assert_called_once_with(self.ctx) mock_servers.start.assert_called_once_with(uuidsentinel.fake_server) + + @mock.patch('masakari.compute.nova.novaclient') + def test_get_aggregate_list(self, mock_novaclient): + mock_aggregates = mock.MagicMock() + mock_novaclient.return_value = mock.MagicMock( + aggregates=mock_aggregates) + self.api.get_aggregate_list(self.ctx) + + mock_novaclient.assert_called_once_with(self.ctx) + self.assertTrue(mock_aggregates.list.called) + + @mock.patch('masakari.compute.nova.novaclient') + def test_add_host_to_aggregate(self, mock_novaclient): + mock_aggregates = mock.MagicMock() + mock_novaclient.return_value = mock.MagicMock( + aggregates=mock_aggregates) + self.api.add_host_to_aggregate(self.ctx, 'fake_host', 'fake_aggregate') + mock_novaclient.assert_called_once_with(self.ctx) + mock_aggregates.add_host.assert_called_once_with( + 'fake_aggregate', 'fake_host') diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py index 2fa96065..f4f76763 100644 --- a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py +++ b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py @@ -89,7 +89,7 @@ class HostFailureTestCase(test.TestCase): with mock.patch.object( self.novaclient, "enable_disable_service") as mock_enable_disable_service: - instance_list = task.execute(self.ctxt, + instance_list = task.execute(self.ctxt, self.instance_host, instance_list['instance_list'], reserved_host=reserved_host) @@ -97,7 +97,7 @@ class HostFailureTestCase(test.TestCase): self.ctxt, reserved_host.name, enable=True) else: instance_list = task.execute( - self.ctxt, instance_list['instance_list']) + self.ctxt, self.instance_host, instance_list['instance_list']) return instance_list @@ -137,6 +137,8 @@ class HostFailureTestCase(test.TestCase): _mock_novaclient.return_value = self.fake_client self.override_config("evacuate_all_instances", True, "host_failure") + self.override_config("add_reserved_host_to_aggregate", + True, "host_failure") # create test data self.fake_client.servers.create(id="1", host=self.instance_host, @@ -144,6 +146,8 @@ class HostFailureTestCase(test.TestCase): self.fake_client.servers.create(id="2", host=self.instance_host) reserved_host = fakes.create_fake_host(name="fake-reserved-host", reserved=True) + self.fake_client.aggregates.create(id="1", name='fake_agg', + hosts=[self.instance_host]) # execute DisableComputeServiceTask self._test_disable_compute_service() @@ -156,6 +160,8 @@ class HostFailureTestCase(test.TestCase): instance_list = self._evacuate_instances( instance_list, reserved_host=reserved_host) self.assertEqual(1, mock_save.call_count) + self.assertIn(reserved_host.name, + self.fake_client.aggregates.get('fake_agg').hosts) # execute ConfirmEvacuationTask self._test_confirm_evacuate_task(instance_list) @@ -182,7 +188,7 @@ class HostFailureTestCase(test.TestCase): # method is called. with mock.patch.object(fakes.FakeNovaClient.ServerManager, "evacuate") as mock_evacuate: - task.execute(self.ctxt, + task.execute(self.ctxt, self.instance_host, instance_list['instance_list']) self.assertEqual(2, mock_evacuate.call_count) diff --git a/masakari/tests/unit/fakes.py b/masakari/tests/unit/fakes.py index a3180c46..6b8880f5 100644 --- a/masakari/tests/unit/fakes.py +++ b/masakari/tests/unit/fakes.py @@ -80,6 +80,36 @@ class FakeNovaClient(object): server = self.get(id) setattr(server, 'OS-EXT-STS:vm_state', 'active') + class Aggregate(object): + def __init__(self, id=None, uuid=None, name=None, hosts=None): + self.id = id + self.uuid = uuid or uuidutils.generate_uuid() + self.name = name + self.hosts = hosts + + class AggregatesManager(object): + def __init__(self): + self.aggregates = [] + + def create(self, id, uuid=None, name=None, hosts=None): + aggregate = FakeNovaClient.Aggregate(id=id, uuid=uuid, name=name, + hosts=hosts) + self.aggregates.append(aggregate) + return aggregate + + def list(self): + return self.aggregates + + def add_host(self, aggregate_name, host_name): + aggregate = self.get(aggregate_name) + if host_name not in aggregate.hosts: + aggregate.hosts.append(host_name) + + def get(self, name): + for aggregate in self.aggregates: + if aggregate.name == name: + return aggregate + class Service(object): def __init__(self, id=None, host=None, binary=None, status='enabled'): self.id = id @@ -111,6 +141,7 @@ class FakeNovaClient(object): def __init__(self): self.servers = FakeNovaClient.ServerManager() self.services = FakeNovaClient.Services() + self.aggregates = FakeNovaClient.AggregatesManager() def create_fake_notification(type="VM", id=1, payload=None, diff --git a/releasenotes/notes/add_reserved_host_to_aggregates-5f506d08354ec148.yaml b/releasenotes/notes/add_reserved_host_to_aggregates-5f506d08354ec148.yaml new file mode 100644 index 00000000..3e324887 --- /dev/null +++ b/releasenotes/notes/add_reserved_host_to_aggregates-5f506d08354ec148.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + Operators can now decide based on the new config option + 'add_reserved_host_to_aggregate' whether to add or not a reserved_host + to all host aggregates which failed compute host belongs to. + + To use this feature, following config option need to be set under + ``host_failure`` section in 'masakari.conf' file:: + + [host_failure] + add_reserved_host_to_aggregate = True