diff --git a/watcher/applier/actions/migration.py b/watcher/applier/actions/migration.py index 03a1e2411..14283a6fc 100644 --- a/watcher/applier/actions/migration.py +++ b/watcher/applier/actions/migration.py @@ -165,20 +165,21 @@ class Migrate(base.BaseAction): else: LOG.debug("Migrate instance %s to %s", self.instance_uuid, destination) - instance = nova.find_instance(self.instance_uuid) - if instance: - if self.migration_type == self.LIVE_MIGRATION: - return self._live_migrate_instance(nova, destination) - elif self.migration_type == self.COLD_MIGRATION: - return self._cold_migrate_instance(nova, destination) - else: - raise exception.Invalid( - message=(_("Migration of type '%(migration_type)s' is not " - "supported.") % - {'migration_type': self.migration_type})) - else: + try: + nova.find_instance(self.instance_uuid) + except exception.ComputeResourceNotFound: raise exception.InstanceNotFound(name=self.instance_uuid) + if self.migration_type == self.LIVE_MIGRATION: + return self._live_migrate_instance(nova, destination) + elif self.migration_type == self.COLD_MIGRATION: + return self._cold_migrate_instance(nova, destination) + else: + raise exception.Invalid( + message=(_("Migration of type '%(migration_type)s' is not " + "supported.") % + {'migration_type': self.migration_type})) + def execute(self): return self.migrate(destination=self.destination_node) @@ -187,17 +188,18 @@ class Migrate(base.BaseAction): def abort(self): nova = nova_helper.NovaHelper(osc=self.osc) - instance = nova.find_instance(self.instance_uuid) - if instance: - if self.migration_type == self.COLD_MIGRATION: - return self._abort_cold_migrate(nova) - elif self.migration_type == self.LIVE_MIGRATION: - return self._abort_live_migrate( - nova, source=self.source_node, - destination=self.destination_node) - else: + try: + nova.find_instance(self.instance_uuid) + except exception.ComputeResourceNotFound: raise exception.InstanceNotFound(name=self.instance_uuid) + if self.migration_type == self.COLD_MIGRATION: + return self._abort_cold_migrate(nova) + elif self.migration_type == self.LIVE_MIGRATION: + return self._abort_live_migrate( + nova, source=self.source_node, + destination=self.destination_node) + def pre_condition(self): """Check migration preconditions diff --git a/watcher/applier/actions/resize.py b/watcher/applier/actions/resize.py index 7c10841ea..11766c18e 100644 --- a/watcher/applier/actions/resize.py +++ b/watcher/applier/actions/resize.py @@ -79,17 +79,23 @@ class Resize(base.BaseAction): nova = nova_helper.NovaHelper(osc=self.osc) LOG.debug("Resize instance %s to %s flavor", self.instance_uuid, self.flavor) - instance = nova.find_instance(self.instance_uuid) + try: + nova.find_instance(self.instance_uuid) + except exception.ComputeResourceNotFound: + LOG.warning("Instance %s not found, skipping resize", + self.instance_uuid) + raise exception.InstanceNotFound(name=self.instance_uuid) + result = None - if instance: - try: - result = nova.resize_instance( - instance_id=self.instance_uuid, flavor=self.flavor) - except Exception as exc: - LOG.exception(exc) - LOG.critical( - "Unexpected error occurred. Resizing failed for " - "instance %s.", self.instance_uuid) + try: + result = nova.resize_instance( + instance_id=self.instance_uuid, flavor=self.flavor) + except Exception as exc: + LOG.exception(exc) + LOG.critical( + "Unexpected error occurred. Resizing failed for " + "instance %s.", self.instance_uuid) + return False return result def execute(self): diff --git a/watcher/applier/actions/stop.py b/watcher/applier/actions/stop.py index 925d70c6a..5c78b20f7 100644 --- a/watcher/applier/actions/stop.py +++ b/watcher/applier/actions/stop.py @@ -84,20 +84,21 @@ class Stop(base.BaseAction): return True else: # Check if failure was due to instance not found (idempotent) - instance = nova.find_instance(self.instance_uuid) - if not instance: + try: + nova.find_instance(self.instance_uuid) + except exception.ComputeResourceNotFound: LOG.info( "Instance %(uuid)s not found, " "considering stop operation successful", {'uuid': self.instance_uuid} ) return True - else: - LOG.error( - "Failed to stop instance %(uuid)s", - {'uuid': self.instance_uuid} - ) - return False + + LOG.error( + "Failed to stop instance %(uuid)s", + {'uuid': self.instance_uuid} + ) + return False def execute(self): return self.stop() diff --git a/watcher/applier/actions/volume_migration.py b/watcher/applier/actions/volume_migration.py index 9732c104b..344becedb 100644 --- a/watcher/applier/actions/volume_migration.py +++ b/watcher/applier/actions/volume_migration.py @@ -143,7 +143,15 @@ class VolumeMigrate(base.BaseAction): # since it has attachments we need to validate nova's constraints instance_id = volume.attachments[0]['server_id'] - instance_status = self.nova_util.find_instance(instance_id).status + try: + instance = self.nova_util.find_instance(instance_id) + except exception.ComputeResourceNotFound: + LOG.debug( + "Could not find instance %s, could not determine whether" + "it's safe to migrate", instance_id + ) + return False + instance_status = instance.status LOG.debug( f"volume: {volume.id} is attached to instance: {instance_id} " f"in instance status: {instance_status}") diff --git a/watcher/common/nova_helper.py b/watcher/common/nova_helper.py index a97e8da83..bbe6bf013 100644 --- a/watcher/common/nova_helper.py +++ b/watcher/common/nova_helper.py @@ -629,7 +629,8 @@ class NovaHelper: """Find an instance by its ID. :param instance_id: the UUID of the instance to find - :returns: Server wrapper object if found, None if not found + :returns: Server wrapper object if found + :raises: ComputeResourceNotFound if instance is not found """ instance = self.connection.compute.get_server(instance_id) return Server.from_openstacksdk(instance) diff --git a/watcher/decision_engine/strategy/strategies/zone_migration.py b/watcher/decision_engine/strategy/strategies/zone_migration.py index a887ddded..3453bb5d6 100644 --- a/watcher/decision_engine/strategy/strategies/zone_migration.py +++ b/watcher/decision_engine/strategy/strategies/zone_migration.py @@ -18,6 +18,7 @@ from oslo_utils import timeutils from cinderclient.v3.volumes import Volume from watcher._i18n import _ from watcher.common import cinder_helper +from watcher.common import exception from watcher.common import nova_helper from watcher.decision_engine.model import element from watcher.decision_engine.strategy.strategies import base @@ -444,11 +445,22 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy): # if with_attached_volume is True, migrate instances attached # to the volumes, only if they are possible migration targets if self.with_attached_volume: - instances = [ - self.nova.find_instance(dic.get('server_id')) - for dic in volume.attachments - if dic.get('server_id') in instance_target_ids - ] + instances = [] + for dic in volume.attachments: + instance_id = dic.get('server_id') + try: + instance = self.nova.find_instance(instance_id) + except exception.ComputeResourceNotFound: + # if an instance can't be found, we won't consider it + # for migration, but the audit should not fail because + # of it + LOG.debug( + "Could not find instance %s it will not be " + "considered for migration", instance_id + ) + continue + if instance_id in instance_target_ids: + instances.append(instance) self.instances_migration(instances, action_counter) action_counter.add_pool(pool) diff --git a/watcher/tests/unit/applier/actions/test_migration.py b/watcher/tests/unit/applier/actions/test_migration.py index ca4b601ae..202f15c4c 100644 --- a/watcher/tests/unit/applier/actions/test_migration.py +++ b/watcher/tests/unit/applier/actions/test_migration.py @@ -395,3 +395,32 @@ class TestMigration(test_utils.NovaResourcesMixin, base.TestCase): self.m_helper.abort_live_migrate.assert_called_once_with( instance_id=self.INSTANCE_UUID, source="compute1-hostname", destination="compute2-hostname") + + def test_execute_live_migration_failure_instance_not_found(self): + # Live migration fails but instance doesn't exist (idempotent) + self.m_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound(self.INSTANCE_UUID)) + self.m_helper.live_migrate_instance.return_value = False + + self.assertRaisesRegex( + exception.InstanceNotFound, + f"The instance '{self.INSTANCE_UUID}' could not be found", + self.action.execute + ) + + # Should check instance existence + self.m_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID) + + def test_execute_cold_migration_failure_instance_not_found(self): + # Cold migration fails but instance doesn't exist (idempotent) + self.m_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound(self.INSTANCE_UUID)) + self.m_helper.watcher_non_live_migrate_instance.return_value = False + + self.assertRaisesRegex( + exception.InstanceNotFound, + f"The instance '{self.INSTANCE_UUID}' could not be found", + self.action.execute + ) + # Should check instance existence + self.m_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID) diff --git a/watcher/tests/unit/applier/actions/test_resize.py b/watcher/tests/unit/applier/actions/test_resize.py index b672f24b1..4fb3e6df3 100644 --- a/watcher/tests/unit/applier/actions/test_resize.py +++ b/watcher/tests/unit/applier/actions/test_resize.py @@ -116,3 +116,19 @@ class TestResize(base.TestCase): exception.ActionExecutionFailure, "Flavor x1 not found", self.action.pre_condition) + + def test_execute_resize_failure_instance_not_found(self): + # Resize fails but instance doesn't exist (idempotent) + self.r_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound(self.INSTANCE_UUID) + ) + self.r_helper.resize_instance.return_value = False + + self.assertRaisesRegex( + exception.InstanceNotFound, + f"The instance '{self.INSTANCE_UUID}' could not be found", + self.action.execute + ) + + # Should check instance existence + self.r_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID) diff --git a/watcher/tests/unit/applier/actions/test_stop.py b/watcher/tests/unit/applier/actions/test_stop.py index f406a017a..b09391126 100644 --- a/watcher/tests/unit/applier/actions/test_stop.py +++ b/watcher/tests/unit/applier/actions/test_stop.py @@ -135,7 +135,8 @@ class TestStop(test_utils.NovaResourcesMixin, base.TestCase): def test_execute_stop_failure_instance_not_found(self): # Stop operation fails but instance doesn't exist (idempotent) - self.m_helper.find_instance.return_value = None + self.m_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound()) self.m_helper.stop_instance.return_value = False result = self.action.execute() diff --git a/watcher/tests/unit/applier/actions/test_volume_migration.py b/watcher/tests/unit/applier/actions/test_volume_migration.py index c83e88ffa..b10ccd693 100644 --- a/watcher/tests/unit/applier/actions/test_volume_migration.py +++ b/watcher/tests/unit/applier/actions/test_volume_migration.py @@ -213,6 +213,15 @@ class TestMigration(base.TestCase): result = self.action_swap._can_swap(volume) self.assertFalse(result) + def test_can_swap_instance_not_found(self): + volume = self.fake_volume( + status='in-use', attachments=[ + {'server_id': TestMigration.INSTANCE_UUID}]) + self.m_n_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound(TestMigration.INSTANCE_UUID)) + result = self.action_swap._can_swap(volume) + self.assertFalse(result) + def test_swap_success(self): volume = self.fake_volume( status='in-use', attachments=[ diff --git a/watcher/tests/unit/decision_engine/strategy/strategies/test_zone_migration.py b/watcher/tests/unit/decision_engine/strategy/strategies/test_zone_migration.py index 36282bea6..0fcf6b5b3 100644 --- a/watcher/tests/unit/decision_engine/strategy/strategies/test_zone_migration.py +++ b/watcher/tests/unit/decision_engine/strategy/strategies/test_zone_migration.py @@ -17,6 +17,7 @@ from unittest import mock import cinderclient +from watcher.common import exception from watcher.common import nova_helper from watcher.common import utils from watcher.decision_engine.strategy import strategies @@ -1315,6 +1316,147 @@ class TestZoneMigration(test_utils.NovaResourcesMixin, TestBaseStrategy): if item['name'] == "live_instance_migrate_ratio"][0] self.assertEqual(100, live_ind) + def test_execute_mixed_instances_volumes_with_attached_not_found(self): + """Test that the strategy handles properly an instance not found. + + If a instance is missing when checking the volume attachments, the + strategy execution should not fail, just skip that instance. The + instance should still be migrated, but in the regular order, as if + with_attached_volume was False. + """ + server_info = { + "compute_host": "src1", + "name": "INSTANCE_1", + "id": "d010ef1f-dc19-4982-9383-087498bfde03", + "vm_state": "active" + } + instance_on_src1_1 = nova_helper.Server.from_openstacksdk( + self.create_openstacksdk_server(**server_info) + ) + server_info = { + "compute_host": "src2", + "name": "INSTANCE_2", + "id": "d020ef1f-dc19-4982-9383-087498bfde03", + "vm_state": "active" + } + instance_on_src2_2 = nova_helper.Server.from_openstacksdk( + self.create_openstacksdk_server(**server_info) + ) + server_info = { + "compute_host": "src1", + "name": "INSTANCE_3", + "id": "d030ef1f-dc19-4982-9383-087498bfde03", + "vm_state": "active" + } + instance_on_src1_3 = nova_helper.Server.from_openstacksdk( + self.create_openstacksdk_server(**server_info) + ) + self.m_n_helper.get_instance_list.return_value = [ + instance_on_src1_1, + instance_on_src2_2, + instance_on_src1_3 + ] + + volume_on_src1_1 = self.fake_volume(host="src1@back1#pool1", + id=volume_uuid_mapping["volume_1"], + name="volume_1") + volume_on_src1_2 = self.fake_volume(host="src1@back1#pool1", + id=volume_uuid_mapping["volume_2"], + name="volume_2") + volume_on_src2_1 = self.fake_volume(host="src2@back1#pool1", + id=volume_uuid_mapping["volume_3"], + name="volume_3") + self.m_c_helper.get_volume_list.return_value = [ + volume_on_src1_1, + volume_on_src1_2, + volume_on_src2_1, + ] + + volume_on_src1_1.status = 'in-use' + volume_on_src1_1.attachments = [{ + "server_id": "d010ef1f-dc19-4982-9383-087498bfde03", + "attachment_id": "attachment1" + }] + + self.input_parameters["compute_nodes"] = [ + {"src_node": "src1", "dst_node": "dst1"}, + ] + self.input_parameters["storage_pools"] = [ + {"src_pool": "src1@back1#pool1", "dst_pool": "dst1@back1#pool1", + "src_type": "type1", "dst_type": "type1"}, + ] + self.input_parameters["with_attached_volume"] = True + + uuid = "d010ef1f-dc19-4982-9383-087498bfde03" + self.m_n_helper.find_instance.side_effect = ( + exception.ComputeResourceNotFound(uuid) + ) + + solution = self.strategy.execute() + # Check migrations + action_types = collections.Counter( + [action['action_type'] + for action in solution.actions]) + expected_vol_migrations = [ + {'action_type': 'volume_migrate', + 'input_parameters': + {'migration_type': 'migrate', + 'destination_node': 'dst1@back1#pool1', + 'resource_name': 'volume_1', + 'resource_id': '74454247-a064-4b34-8f43-89337987720e'}}, + {'action_type': 'volume_migrate', + 'input_parameters': + {'migration_type': 'migrate', + 'destination_node': 'dst1@back1#pool1', + 'resource_name': 'volume_2', + 'resource_id': 'a16c811e-2521-4fd3-8779-6a94ccb3be73'}} + ] + expected_vm_migrations = [ + {'action_type': 'migrate', + 'input_parameters': + {'migration_type': 'live', + 'source_node': 'src1', + 'destination_node': 'dst1', + 'resource_name': 'INSTANCE_1', + 'resource_id': 'd010ef1f-dc19-4982-9383-087498bfde03'}}, + {'action_type': 'migrate', + 'input_parameters': + {'migration_type': 'live', + 'source_node': 'src1', + 'destination_node': 'dst1', + 'resource_name': 'INSTANCE_3', + 'resource_id': 'd030ef1f-dc19-4982-9383-087498bfde03'}} + ] + migrated_volumes = [action + for action in solution.actions + if action['action_type'] == 'volume_migrate'] + self.assertEqual(2, action_types.get("volume_migrate", 0)) + self.assertEqual(expected_vol_migrations, migrated_volumes) + migrated_vms = [action + for action in solution.actions + if action['action_type'] == 'migrate'] + self.assertEqual(2, action_types.get("migrate", 0)) + self.assertEqual(expected_vm_migrations, migrated_vms) + + self.assertEqual(2, action_types.get("migrate", 0)) + # check that the live migration is the third action, after the + # volume migrations + second_action = solution.actions[2]['input_parameters'] + self.assertEqual(second_action['migration_type'], 'live') + + # All the detached volumes in the pool should be migrated + volume_mig_ind = [item['value'] for item in solution.global_efficacy + if item['name'] == "volume_migrate_ratio"][0] + self.assertEqual(100, volume_mig_ind) + # All the attached volumes in the pool should be swapped + volume_swap_ind = [item['value'] for item in solution.global_efficacy + if item['name'] == "volume_update_ratio"][0] + self.assertEqual(100, volume_swap_ind) + # All the instances in src1 should be migrated + live_ind = [item['value'] for item in solution.global_efficacy + if item['name'] == "live_instance_migrate_ratio"][0] + self.assertEqual(100, live_ind) + def test_instance_migration_exists(self): fake_actions = [