Skip migrate actions in pre_condition phase
This patch is implementing detection of certain conditions and moving the action to SKIPPED status so that, execution is not started. We will check that: - The instance exists - The instance is running in the source_node On other contitions the action will be FAILED in the pre_condition check: - Destination node (if specified) does not exists or is disabled - Instance is not ACTIVE and migration_type is live. Implements: blueprint skip-actions-in-pre-condition Assisted-By: claude-code (claude-sonnet-4.5) Change-Id: I52f6fbe6961a4fb195e601b529110c6c3154c2dc Signed-off-by: Alfredo Moralejo <amoralej@redhat.com>
This commit is contained in:
@@ -35,3 +35,17 @@ parameter type required description
|
||||
If not specified, nova-scheduler
|
||||
will determine the destination
|
||||
======================== ====== ======== ===================================
|
||||
|
||||
Skipping conditions
|
||||
--------------------
|
||||
|
||||
Migrate actions will be automatically skipped in the pre_condition phase in
|
||||
the following cases:
|
||||
|
||||
- The server does not exist
|
||||
- The server is not running on the source_node
|
||||
|
||||
On other conditions the action will be FAILED in the pre_condition check:
|
||||
|
||||
- Destination node (if specified) does not exist or is disabled
|
||||
- The server status is not ACTIVE for live migration
|
||||
|
||||
17
releasenotes/notes/migration-precondition-check-ff55be4.yaml
Normal file
17
releasenotes/notes/migration-precondition-check-ff55be4.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Added a pre-condition check to the migration action to prevent executing
|
||||
migrations when required criteria are not met. In following conditions,
|
||||
the action status will be set to SKIPPED:
|
||||
|
||||
- Instance is not found
|
||||
- Instance is not running on source node
|
||||
|
||||
On other conditions the action will be FAILED in the pre_condition
|
||||
check:
|
||||
|
||||
- Destination node (if specified) does not exists or is disabled
|
||||
- Instance is not ACTIVE and migration_type is live.
|
||||
|
||||
This improves safety and correctness of migrations.
|
||||
@@ -199,9 +199,60 @@ class Migrate(base.BaseAction):
|
||||
raise exception.InstanceNotFound(name=self.instance_uuid)
|
||||
|
||||
def pre_condition(self):
|
||||
# TODO(jed): check if the instance exists / check if the instance is on
|
||||
# the source_node
|
||||
pass
|
||||
"""Check migration preconditions
|
||||
|
||||
Skipping conditions:
|
||||
- Instance does not exist
|
||||
- Instance is not running on the source_node
|
||||
Failing conditions:
|
||||
- Destination node (if specified) does not exist or is disabled
|
||||
- Instance status is not ACTIVE for live migration
|
||||
"""
|
||||
nova = nova_helper.NovaHelper(osc=self.osc)
|
||||
|
||||
# Check that the instance exists
|
||||
try:
|
||||
instance = nova.find_instance(self.instance_uuid)
|
||||
except nova_helper.nvexceptions.NotFound:
|
||||
raise exception.ActionSkipped(
|
||||
_("Instance %s not found") % self.instance_uuid)
|
||||
|
||||
# Check that the instance is running on source_node
|
||||
instance_host = getattr(instance, 'OS-EXT-SRV-ATTR:host', None)
|
||||
if instance_host != self.source_node:
|
||||
raise exception.ActionSkipped(
|
||||
_("Instance %(instance)s is not running on source node "
|
||||
"%(source)s (currently on %(current)s)") %
|
||||
{'instance': self.instance_uuid,
|
||||
'source': self.source_node,
|
||||
'current': instance_host})
|
||||
|
||||
# Check destination node if specified
|
||||
if self.destination_node:
|
||||
try:
|
||||
# Find the compute node and check if service is enabled
|
||||
dest_node = nova.get_compute_node_by_hostname(
|
||||
self.destination_node)
|
||||
|
||||
# Check if compute service is enabled
|
||||
if dest_node.status != 'enabled':
|
||||
raise exception.ActionExecutionFailure(
|
||||
_("Destination node %s is not in enabled state") %
|
||||
self.destination_node)
|
||||
except exception.ComputeNodeNotFound:
|
||||
raise exception.ActionExecutionFailure(
|
||||
_("Destination node %s not found") %
|
||||
self.destination_node)
|
||||
|
||||
# Check instance status based on migration type
|
||||
instance_status = instance.status
|
||||
if self.migration_type == self.LIVE_MIGRATION:
|
||||
if instance_status != 'ACTIVE':
|
||||
raise exception.ActionExecutionFailure(
|
||||
_("Live migration requires instance %(instance)s to be "
|
||||
"in ACTIVE status (current status: %(status)s)") %
|
||||
{'instance': self.instance_uuid,
|
||||
'status': instance_status})
|
||||
|
||||
def post_condition(self):
|
||||
# TODO(jed): check extra parameters (network response, etc.)
|
||||
|
||||
@@ -17,6 +17,9 @@ from unittest import mock
|
||||
|
||||
import jsonschema
|
||||
|
||||
from novaclient.v2 import hypervisors
|
||||
from novaclient.v2 import servers
|
||||
|
||||
from watcher.applier.actions import base as baction
|
||||
from watcher.applier.actions import migration
|
||||
from watcher.common import clients
|
||||
@@ -133,11 +136,163 @@ class TestMigration(base.TestCase):
|
||||
self.assertRaises(jsonschema.ValidationError,
|
||||
self.action.validate_parameters)
|
||||
|
||||
def test_migration_pre_condition(self):
|
||||
try:
|
||||
self.action.pre_condition()
|
||||
except Exception as exc:
|
||||
self.fail(exc)
|
||||
def test_migration_pre_condition_success(self):
|
||||
"""Test successful pre_condition with all checks passing"""
|
||||
parameters = {baction.BaseAction.RESOURCE_ID: self.INSTANCE_UUID,
|
||||
'migration_type': 'live',
|
||||
'source_node': 'compute1-hostname',
|
||||
'destination_node': 'compute2-hostname'}
|
||||
self.action.input_parameters = parameters
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'ACTIVE',
|
||||
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
compute_node_info = {
|
||||
'id': 'compute2-hostname',
|
||||
'status': 'enabled',
|
||||
'hypervisor_hostname': 'compute1-hostname',
|
||||
'service': {
|
||||
'host': 'compute1-hostname'
|
||||
}
|
||||
}
|
||||
compute_node = hypervisors.Hypervisor(
|
||||
hypervisors.HypervisorManager, info=compute_node_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
|
||||
|
||||
self.action.pre_condition()
|
||||
|
||||
def test_pre_condition_instance_not_found(self):
|
||||
"""Test pre_condition fails when instance doesn't exist"""
|
||||
self.m_helper.find_instance.side_effect = (
|
||||
nova_helper.nvexceptions.NotFound('404'))
|
||||
|
||||
self.assertRaisesRegex(
|
||||
exception.ActionSkipped,
|
||||
f"Instance {self.INSTANCE_UUID} not found",
|
||||
self.action.pre_condition)
|
||||
|
||||
def test_pre_condition_instance_on_wrong_host(self):
|
||||
"""Test pre_condition fails when instance is on wrong host"""
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'ACTIVE',
|
||||
'OS-EXT-SRV-ATTR:host': 'wrong-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
|
||||
self.assertRaisesRegex(
|
||||
exception.ActionSkipped,
|
||||
f"Instance {self.INSTANCE_UUID} is not running on source node "
|
||||
r"compute1-hostname \(currently on wrong-hostname\)",
|
||||
self.action.pre_condition)
|
||||
|
||||
def test_pre_condition_destination_node_not_found(self):
|
||||
"""Test pre_condition fails when destination node doesn't exist"""
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'ACTIVE',
|
||||
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
self.m_helper.get_compute_node_by_hostname.side_effect = (
|
||||
exception.ComputeNodeNotFound(name='compute2-hostname'))
|
||||
|
||||
self.assertRaisesRegex(
|
||||
exception.ActionExecutionFailure,
|
||||
"Destination node compute2-hostname not found",
|
||||
self.action.pre_condition)
|
||||
|
||||
def test_pre_condition_destination_node_disabled(self):
|
||||
"""Test pre_condition fails when destination node is disabled"""
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'ACTIVE',
|
||||
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
compute_node_info = {
|
||||
'id': 'compute2-hostname',
|
||||
'status': 'disabled',
|
||||
'hypervisor_hostname': 'compute2-hostname',
|
||||
'service': {
|
||||
'host': 'compute2-hostname'
|
||||
}
|
||||
}
|
||||
compute_node = hypervisors.Hypervisor(
|
||||
hypervisors.HypervisorManager, info=compute_node_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
|
||||
|
||||
self.assertRaisesRegex(
|
||||
exception.ActionExecutionFailure,
|
||||
"Destination node compute2-hostname is not in enabled state",
|
||||
self.action.pre_condition)
|
||||
|
||||
def test_pre_condition_live_migration_wrong_status(self):
|
||||
"""Test pre_condition fails live migration with non-ACTIVE status"""
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'SHUTOFF',
|
||||
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
compute_node_info = {
|
||||
'id': 'compute2-hostname',
|
||||
'status': 'enabled',
|
||||
'hypervisor_hostname': 'compute2-hostname',
|
||||
'service': {
|
||||
'host': 'compute2-hostname'
|
||||
}
|
||||
}
|
||||
compute_node = hypervisors.Hypervisor(
|
||||
hypervisors.HypervisorManager, info=compute_node_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
self.m_helper.get_compute_node_by_hostname.return_value = compute_node
|
||||
|
||||
self.assertRaisesRegex(
|
||||
exception.ActionExecutionFailure,
|
||||
f"Live migration requires instance {self.INSTANCE_UUID} to be in "
|
||||
r"ACTIVE status \(current status: SHUTOFF\)",
|
||||
self.action.pre_condition)
|
||||
|
||||
def test_pre_condition_no_destination_node(self):
|
||||
"""Test pre_condition with no destination node specified"""
|
||||
instance_info = {
|
||||
'id': self.INSTANCE_UUID,
|
||||
'status': 'ACTIVE',
|
||||
'OS-EXT-SRV-ATTR:host': 'compute1-hostname'
|
||||
}
|
||||
instance = servers.Server(servers.ServerManager, info=instance_info)
|
||||
|
||||
self.m_helper.find_instance.return_value = instance
|
||||
|
||||
# Create action without destination_node
|
||||
params = {
|
||||
"migration_type": "live",
|
||||
"source_node": "compute1-hostname",
|
||||
"destination_node": None,
|
||||
baction.BaseAction.RESOURCE_ID: self.INSTANCE_UUID,
|
||||
}
|
||||
action = migration.Migrate(mock.Mock())
|
||||
action.input_parameters = params
|
||||
|
||||
action.pre_condition()
|
||||
|
||||
# Ensure get_compute_node_by_hostname was not called
|
||||
self.m_helper.get_compute_node_by_hostname.assert_not_called()
|
||||
|
||||
def test_migration_post_condition(self):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user