Skip migrate actions in pre_condition phase

This patch is implementing detection of certain conditions and moving
the action to SKIPPED status so that, execution is not started. We will
check that:

- The instance exists
- The instance is running in the source_node

On other contitions the action will be FAILED in the pre_condition
check:

- Destination node (if specified) does not exists or is disabled
- Instance is not ACTIVE and migration_type is live.

Implements: blueprint skip-actions-in-pre-condition

Assisted-By: claude-code (claude-sonnet-4.5)

Change-Id: I52f6fbe6961a4fb195e601b529110c6c3154c2dc
Signed-off-by: Alfredo Moralejo <amoralej@redhat.com>
This commit is contained in:
Alfredo Moralejo
2025-10-28 09:28:17 +01:00
parent 3c2d7a4b0f
commit c14f9098a6
4 changed files with 245 additions and 8 deletions

View File

@@ -35,3 +35,17 @@ parameter type required description
If not specified, nova-scheduler
will determine the destination
======================== ====== ======== ===================================
Skipping conditions
--------------------
Migrate actions will be automatically skipped in the pre_condition phase in
the following cases:
- The server does not exist
- The server is not running on the source_node
On other conditions the action will be FAILED in the pre_condition check:
- Destination node (if specified) does not exist or is disabled
- The server status is not ACTIVE for live migration

View File

@@ -0,0 +1,17 @@
---
features:
- |
Added a pre-condition check to the migration action to prevent executing
migrations when required criteria are not met. In following conditions,
the action status will be set to SKIPPED:
- Instance is not found
- Instance is not running on source node
On other conditions the action will be FAILED in the pre_condition
check:
- Destination node (if specified) does not exists or is disabled
- Instance is not ACTIVE and migration_type is live.
This improves safety and correctness of migrations.

View File

@@ -199,9 +199,60 @@ class Migrate(base.BaseAction):
raise exception.InstanceNotFound(name=self.instance_uuid)
def pre_condition(self):
# TODO(jed): check if the instance exists / check if the instance is on
# the source_node
pass
"""Check migration preconditions
Skipping conditions:
- Instance does not exist
- Instance is not running on the source_node
Failing conditions:
- Destination node (if specified) does not exist or is disabled
- Instance status is not ACTIVE for live migration
"""
nova = nova_helper.NovaHelper(osc=self.osc)
# Check that the instance exists
try:
instance = nova.find_instance(self.instance_uuid)
except nova_helper.nvexceptions.NotFound:
raise exception.ActionSkipped(
_("Instance %s not found") % self.instance_uuid)
# Check that the instance is running on source_node
instance_host = getattr(instance, 'OS-EXT-SRV-ATTR:host', None)
if instance_host != self.source_node:
raise exception.ActionSkipped(
_("Instance %(instance)s is not running on source node "
"%(source)s (currently on %(current)s)") %
{'instance': self.instance_uuid,
'source': self.source_node,
'current': instance_host})
# Check destination node if specified
if self.destination_node:
try:
# Find the compute node and check if service is enabled
dest_node = nova.get_compute_node_by_hostname(
self.destination_node)
# Check if compute service is enabled
if dest_node.status != 'enabled':
raise exception.ActionExecutionFailure(
_("Destination node %s is not in enabled state") %
self.destination_node)
except exception.ComputeNodeNotFound:
raise exception.ActionExecutionFailure(
_("Destination node %s not found") %
self.destination_node)
# Check instance status based on migration type
instance_status = instance.status
if self.migration_type == self.LIVE_MIGRATION:
if instance_status != 'ACTIVE':
raise exception.ActionExecutionFailure(
_("Live migration requires instance %(instance)s to be "
"in ACTIVE status (current status: %(status)s)") %
{'instance': self.instance_uuid,
'status': instance_status})
def post_condition(self):
# TODO(jed): check extra parameters (network response, etc.)

View File

@@ -17,6 +17,9 @@ from unittest import mock
import jsonschema
from novaclient.v2 import hypervisors
from novaclient.v2 import servers
from watcher.applier.actions import base as baction
from watcher.applier.actions import migration
from watcher.common import clients
@@ -133,11 +136,163 @@ class TestMigration(base.TestCase):
self.assertRaises(jsonschema.ValidationError,
self.action.validate_parameters)
def test_migration_pre_condition(self):
try:
self.action.pre_condition()
except Exception as exc:
self.fail(exc)
def test_migration_pre_condition_success(self):
"""Test successful pre_condition with all checks passing"""
parameters = {baction.BaseAction.RESOURCE_ID: self.INSTANCE_UUID,
'migration_type': 'live',
'source_node': 'compute1-hostname',
'destination_node': 'compute2-hostname'}
self.action.input_parameters = parameters
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
compute_node_info = {
'id': 'compute2-hostname',
'status': 'enabled',
'hypervisor_hostname': 'compute1-hostname',
'service': {
'host': 'compute1-hostname'
}
}
compute_node = hypervisors.Hypervisor(
hypervisors.HypervisorManager, info=compute_node_info)
self.m_helper.find_instance.return_value = instance
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
self.action.pre_condition()
def test_pre_condition_instance_not_found(self):
"""Test pre_condition fails when instance doesn't exist"""
self.m_helper.find_instance.side_effect = (
nova_helper.nvexceptions.NotFound('404'))
self.assertRaisesRegex(
exception.ActionSkipped,
f"Instance {self.INSTANCE_UUID} not found",
self.action.pre_condition)
def test_pre_condition_instance_on_wrong_host(self):
"""Test pre_condition fails when instance is on wrong host"""
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'wrong-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
self.m_helper.find_instance.return_value = instance
self.assertRaisesRegex(
exception.ActionSkipped,
f"Instance {self.INSTANCE_UUID} is not running on source node "
r"compute1-hostname \(currently on wrong-hostname\)",
self.action.pre_condition)
def test_pre_condition_destination_node_not_found(self):
"""Test pre_condition fails when destination node doesn't exist"""
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
self.m_helper.find_instance.return_value = instance
self.m_helper.get_compute_node_by_hostname.side_effect = (
exception.ComputeNodeNotFound(name='compute2-hostname'))
self.assertRaisesRegex(
exception.ActionExecutionFailure,
"Destination node compute2-hostname not found",
self.action.pre_condition)
def test_pre_condition_destination_node_disabled(self):
"""Test pre_condition fails when destination node is disabled"""
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
compute_node_info = {
'id': 'compute2-hostname',
'status': 'disabled',
'hypervisor_hostname': 'compute2-hostname',
'service': {
'host': 'compute2-hostname'
}
}
compute_node = hypervisors.Hypervisor(
hypervisors.HypervisorManager, info=compute_node_info)
self.m_helper.find_instance.return_value = instance
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
self.assertRaisesRegex(
exception.ActionExecutionFailure,
"Destination node compute2-hostname is not in enabled state",
self.action.pre_condition)
def test_pre_condition_live_migration_wrong_status(self):
"""Test pre_condition fails live migration with non-ACTIVE status"""
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'SHUTOFF',
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
compute_node_info = {
'id': 'compute2-hostname',
'status': 'enabled',
'hypervisor_hostname': 'compute2-hostname',
'service': {
'host': 'compute2-hostname'
}
}
compute_node = hypervisors.Hypervisor(
hypervisors.HypervisorManager, info=compute_node_info)
self.m_helper.find_instance.return_value = instance
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
self.assertRaisesRegex(
exception.ActionExecutionFailure,
f"Live migration requires instance {self.INSTANCE_UUID} to be in "
r"ACTIVE status \(current status: SHUTOFF\)",
self.action.pre_condition)
def test_pre_condition_no_destination_node(self):
"""Test pre_condition with no destination node specified"""
instance_info = {
'id': self.INSTANCE_UUID,
'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
}
instance = servers.Server(servers.ServerManager, info=instance_info)
self.m_helper.find_instance.return_value = instance
# Create action without destination_node
params = {
"migration_type": "live",
"source_node": "compute1-hostname",
"destination_node": None,
baction.BaseAction.RESOURCE_ID: self.INSTANCE_UUID,
}
action = migration.Migrate(mock.Mock())
action.input_parameters = params
action.pre_condition()
# Ensure get_compute_node_by_hostname was not called
self.m_helper.get_compute_node_by_hostname.assert_not_called()
def test_migration_post_condition(self):
try: