Merge "Fix exception handling for find_instance calls"

This commit is contained in:
Zuul
2026-03-09 15:19:13 +00:00
committed by Gerrit Code Review
11 changed files with 274 additions and 47 deletions

View File

@@ -165,20 +165,21 @@ class Migrate(base.BaseAction):
else:
LOG.debug("Migrate instance %s to %s", self.instance_uuid,
destination)
instance = nova.find_instance(self.instance_uuid)
if instance:
if self.migration_type == self.LIVE_MIGRATION:
return self._live_migrate_instance(nova, destination)
elif self.migration_type == self.COLD_MIGRATION:
return self._cold_migrate_instance(nova, destination)
else:
raise exception.Invalid(
message=(_("Migration of type '%(migration_type)s' is not "
"supported.") %
{'migration_type': self.migration_type}))
else:
try:
nova.find_instance(self.instance_uuid)
except exception.ComputeResourceNotFound:
raise exception.InstanceNotFound(name=self.instance_uuid)
if self.migration_type == self.LIVE_MIGRATION:
return self._live_migrate_instance(nova, destination)
elif self.migration_type == self.COLD_MIGRATION:
return self._cold_migrate_instance(nova, destination)
else:
raise exception.Invalid(
message=(_("Migration of type '%(migration_type)s' is not "
"supported.") %
{'migration_type': self.migration_type}))
def execute(self):
return self.migrate(destination=self.destination_node)
@@ -187,17 +188,18 @@ class Migrate(base.BaseAction):
def abort(self):
nova = nova_helper.NovaHelper(osc=self.osc)
instance = nova.find_instance(self.instance_uuid)
if instance:
if self.migration_type == self.COLD_MIGRATION:
return self._abort_cold_migrate(nova)
elif self.migration_type == self.LIVE_MIGRATION:
return self._abort_live_migrate(
nova, source=self.source_node,
destination=self.destination_node)
else:
try:
nova.find_instance(self.instance_uuid)
except exception.ComputeResourceNotFound:
raise exception.InstanceNotFound(name=self.instance_uuid)
if self.migration_type == self.COLD_MIGRATION:
return self._abort_cold_migrate(nova)
elif self.migration_type == self.LIVE_MIGRATION:
return self._abort_live_migrate(
nova, source=self.source_node,
destination=self.destination_node)
def pre_condition(self):
"""Check migration preconditions

View File

@@ -79,17 +79,23 @@ class Resize(base.BaseAction):
nova = nova_helper.NovaHelper(osc=self.osc)
LOG.debug("Resize instance %s to %s flavor", self.instance_uuid,
self.flavor)
instance = nova.find_instance(self.instance_uuid)
try:
nova.find_instance(self.instance_uuid)
except exception.ComputeResourceNotFound:
LOG.warning("Instance %s not found, skipping resize",
self.instance_uuid)
raise exception.InstanceNotFound(name=self.instance_uuid)
result = None
if instance:
try:
result = nova.resize_instance(
instance_id=self.instance_uuid, flavor=self.flavor)
except Exception as exc:
LOG.exception(exc)
LOG.critical(
"Unexpected error occurred. Resizing failed for "
"instance %s.", self.instance_uuid)
try:
result = nova.resize_instance(
instance_id=self.instance_uuid, flavor=self.flavor)
except Exception as exc:
LOG.exception(exc)
LOG.critical(
"Unexpected error occurred. Resizing failed for "
"instance %s.", self.instance_uuid)
return False
return result
def execute(self):

View File

@@ -84,20 +84,21 @@ class Stop(base.BaseAction):
return True
else:
# Check if failure was due to instance not found (idempotent)
instance = nova.find_instance(self.instance_uuid)
if not instance:
try:
nova.find_instance(self.instance_uuid)
except exception.ComputeResourceNotFound:
LOG.info(
"Instance %(uuid)s not found, "
"considering stop operation successful",
{'uuid': self.instance_uuid}
)
return True
else:
LOG.error(
"Failed to stop instance %(uuid)s",
{'uuid': self.instance_uuid}
)
return False
LOG.error(
"Failed to stop instance %(uuid)s",
{'uuid': self.instance_uuid}
)
return False
def execute(self):
return self.stop()

View File

@@ -143,7 +143,15 @@ class VolumeMigrate(base.BaseAction):
# since it has attachments we need to validate nova's constraints
instance_id = volume.attachments[0]['server_id']
instance_status = self.nova_util.find_instance(instance_id).status
try:
instance = self.nova_util.find_instance(instance_id)
except exception.ComputeResourceNotFound:
LOG.debug(
"Could not find instance %s, could not determine whether"
"it's safe to migrate", instance_id
)
return False
instance_status = instance.status
LOG.debug(
f"volume: {volume.id} is attached to instance: {instance_id} "
f"in instance status: {instance_status}")

View File

@@ -629,7 +629,8 @@ class NovaHelper:
"""Find an instance by its ID.
:param instance_id: the UUID of the instance to find
:returns: Server wrapper object if found, None if not found
:returns: Server wrapper object if found
:raises: ComputeResourceNotFound if instance is not found
"""
instance = self.connection.compute.get_server(instance_id)
return Server.from_openstacksdk(instance)

View File

@@ -18,6 +18,7 @@ from oslo_utils import timeutils
from cinderclient.v3.volumes import Volume
from watcher._i18n import _
from watcher.common import cinder_helper
from watcher.common import exception
from watcher.common import nova_helper
from watcher.decision_engine.model import element
from watcher.decision_engine.strategy.strategies import base
@@ -444,11 +445,22 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
# if with_attached_volume is True, migrate instances attached
# to the volumes, only if they are possible migration targets
if self.with_attached_volume:
instances = [
self.nova.find_instance(dic.get('server_id'))
for dic in volume.attachments
if dic.get('server_id') in instance_target_ids
]
instances = []
for dic in volume.attachments:
instance_id = dic.get('server_id')
try:
instance = self.nova.find_instance(instance_id)
except exception.ComputeResourceNotFound:
# if an instance can't be found, we won't consider it
# for migration, but the audit should not fail because
# of it
LOG.debug(
"Could not find instance %s it will not be "
"considered for migration", instance_id
)
continue
if instance_id in instance_target_ids:
instances.append(instance)
self.instances_migration(instances, action_counter)
action_counter.add_pool(pool)

View File

@@ -395,3 +395,32 @@ class TestMigration(test_utils.NovaResourcesMixin, base.TestCase):
self.m_helper.abort_live_migrate.assert_called_once_with(
instance_id=self.INSTANCE_UUID, source="compute1-hostname",
destination="compute2-hostname")
def test_execute_live_migration_failure_instance_not_found(self):
# Live migration fails but instance doesn't exist (idempotent)
self.m_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound(self.INSTANCE_UUID))
self.m_helper.live_migrate_instance.return_value = False
self.assertRaisesRegex(
exception.InstanceNotFound,
f"The instance '{self.INSTANCE_UUID}' could not be found",
self.action.execute
)
# Should check instance existence
self.m_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID)
def test_execute_cold_migration_failure_instance_not_found(self):
# Cold migration fails but instance doesn't exist (idempotent)
self.m_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound(self.INSTANCE_UUID))
self.m_helper.watcher_non_live_migrate_instance.return_value = False
self.assertRaisesRegex(
exception.InstanceNotFound,
f"The instance '{self.INSTANCE_UUID}' could not be found",
self.action.execute
)
# Should check instance existence
self.m_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID)

View File

@@ -116,3 +116,19 @@ class TestResize(base.TestCase):
exception.ActionExecutionFailure,
"Flavor x1 not found",
self.action.pre_condition)
def test_execute_resize_failure_instance_not_found(self):
# Resize fails but instance doesn't exist (idempotent)
self.r_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound(self.INSTANCE_UUID)
)
self.r_helper.resize_instance.return_value = False
self.assertRaisesRegex(
exception.InstanceNotFound,
f"The instance '{self.INSTANCE_UUID}' could not be found",
self.action.execute
)
# Should check instance existence
self.r_helper.find_instance.assert_called_once_with(self.INSTANCE_UUID)

View File

@@ -135,7 +135,8 @@ class TestStop(test_utils.NovaResourcesMixin, base.TestCase):
def test_execute_stop_failure_instance_not_found(self):
# Stop operation fails but instance doesn't exist (idempotent)
self.m_helper.find_instance.return_value = None
self.m_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound())
self.m_helper.stop_instance.return_value = False
result = self.action.execute()

View File

@@ -213,6 +213,15 @@ class TestMigration(base.TestCase):
result = self.action_swap._can_swap(volume)
self.assertFalse(result)
def test_can_swap_instance_not_found(self):
volume = self.fake_volume(
status='in-use', attachments=[
{'server_id': TestMigration.INSTANCE_UUID}])
self.m_n_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound(TestMigration.INSTANCE_UUID))
result = self.action_swap._can_swap(volume)
self.assertFalse(result)
def test_swap_success(self):
volume = self.fake_volume(
status='in-use', attachments=[

View File

@@ -17,6 +17,7 @@ from unittest import mock
import cinderclient
from watcher.common import exception
from watcher.common import nova_helper
from watcher.common import utils
from watcher.decision_engine.strategy import strategies
@@ -1315,6 +1316,147 @@ class TestZoneMigration(test_utils.NovaResourcesMixin, TestBaseStrategy):
if item['name'] == "live_instance_migrate_ratio"][0]
self.assertEqual(100, live_ind)
def test_execute_mixed_instances_volumes_with_attached_not_found(self):
"""Test that the strategy handles properly an instance not found.
If a instance is missing when checking the volume attachments, the
strategy execution should not fail, just skip that instance. The
instance should still be migrated, but in the regular order, as if
with_attached_volume was False.
"""
server_info = {
"compute_host": "src1",
"name": "INSTANCE_1",
"id": "d010ef1f-dc19-4982-9383-087498bfde03",
"vm_state": "active"
}
instance_on_src1_1 = nova_helper.Server.from_openstacksdk(
self.create_openstacksdk_server(**server_info)
)
server_info = {
"compute_host": "src2",
"name": "INSTANCE_2",
"id": "d020ef1f-dc19-4982-9383-087498bfde03",
"vm_state": "active"
}
instance_on_src2_2 = nova_helper.Server.from_openstacksdk(
self.create_openstacksdk_server(**server_info)
)
server_info = {
"compute_host": "src1",
"name": "INSTANCE_3",
"id": "d030ef1f-dc19-4982-9383-087498bfde03",
"vm_state": "active"
}
instance_on_src1_3 = nova_helper.Server.from_openstacksdk(
self.create_openstacksdk_server(**server_info)
)
self.m_n_helper.get_instance_list.return_value = [
instance_on_src1_1,
instance_on_src2_2,
instance_on_src1_3
]
volume_on_src1_1 = self.fake_volume(host="src1@back1#pool1",
id=volume_uuid_mapping["volume_1"],
name="volume_1")
volume_on_src1_2 = self.fake_volume(host="src1@back1#pool1",
id=volume_uuid_mapping["volume_2"],
name="volume_2")
volume_on_src2_1 = self.fake_volume(host="src2@back1#pool1",
id=volume_uuid_mapping["volume_3"],
name="volume_3")
self.m_c_helper.get_volume_list.return_value = [
volume_on_src1_1,
volume_on_src1_2,
volume_on_src2_1,
]
volume_on_src1_1.status = 'in-use'
volume_on_src1_1.attachments = [{
"server_id": "d010ef1f-dc19-4982-9383-087498bfde03",
"attachment_id": "attachment1"
}]
self.input_parameters["compute_nodes"] = [
{"src_node": "src1", "dst_node": "dst1"},
]
self.input_parameters["storage_pools"] = [
{"src_pool": "src1@back1#pool1", "dst_pool": "dst1@back1#pool1",
"src_type": "type1", "dst_type": "type1"},
]
self.input_parameters["with_attached_volume"] = True
uuid = "d010ef1f-dc19-4982-9383-087498bfde03"
self.m_n_helper.find_instance.side_effect = (
exception.ComputeResourceNotFound(uuid)
)
solution = self.strategy.execute()
# Check migrations
action_types = collections.Counter(
[action['action_type']
for action in solution.actions])
expected_vol_migrations = [
{'action_type': 'volume_migrate',
'input_parameters':
{'migration_type': 'migrate',
'destination_node': 'dst1@back1#pool1',
'resource_name': 'volume_1',
'resource_id': '74454247-a064-4b34-8f43-89337987720e'}},
{'action_type': 'volume_migrate',
'input_parameters':
{'migration_type': 'migrate',
'destination_node': 'dst1@back1#pool1',
'resource_name': 'volume_2',
'resource_id': 'a16c811e-2521-4fd3-8779-6a94ccb3be73'}}
]
expected_vm_migrations = [
{'action_type': 'migrate',
'input_parameters':
{'migration_type': 'live',
'source_node': 'src1',
'destination_node': 'dst1',
'resource_name': 'INSTANCE_1',
'resource_id': 'd010ef1f-dc19-4982-9383-087498bfde03'}},
{'action_type': 'migrate',
'input_parameters':
{'migration_type': 'live',
'source_node': 'src1',
'destination_node': 'dst1',
'resource_name': 'INSTANCE_3',
'resource_id': 'd030ef1f-dc19-4982-9383-087498bfde03'}}
]
migrated_volumes = [action
for action in solution.actions
if action['action_type'] == 'volume_migrate']
self.assertEqual(2, action_types.get("volume_migrate", 0))
self.assertEqual(expected_vol_migrations, migrated_volumes)
migrated_vms = [action
for action in solution.actions
if action['action_type'] == 'migrate']
self.assertEqual(2, action_types.get("migrate", 0))
self.assertEqual(expected_vm_migrations, migrated_vms)
self.assertEqual(2, action_types.get("migrate", 0))
# check that the live migration is the third action, after the
# volume migrations
second_action = solution.actions[2]['input_parameters']
self.assertEqual(second_action['migration_type'], 'live')
# All the detached volumes in the pool should be migrated
volume_mig_ind = [item['value'] for item in solution.global_efficacy
if item['name'] == "volume_migrate_ratio"][0]
self.assertEqual(100, volume_mig_ind)
# All the attached volumes in the pool should be swapped
volume_swap_ind = [item['value'] for item in solution.global_efficacy
if item['name'] == "volume_update_ratio"][0]
self.assertEqual(100, volume_swap_ind)
# All the instances in src1 should be migrated
live_ind = [item['value'] for item in solution.global_efficacy
if item['name'] == "live_instance_migrate_ratio"][0]
self.assertEqual(100, live_ind)
def test_instance_migration_exists(self):
fake_actions = [