Browse Source

Avoid error state for recovered instances after failed migrations

Most users expect that if a live migration fails but the instance
is fully recovered, it shouldn't enter 'error' state. Setting
the migration status to 'error' should be enough. This simplifies
debugging, making it clear that the instance dosn't have to be
manually recovered.

This patch changed this behavior, indirectly affecting the Hyper-V
driver: Idfdce9e7dd8106af01db0358ada15737cb846395

We'll stop propagating exceptions when managing to recover the
instance, which matches the reference driver (libvirt) behavior.

Change-Id: I4b667c06e008b03457d35ce9c920b916d21c255f
Closes-Bug: 1841411
(cherry picked from commit a721681e6111d85264e20085d1f184d1c923b69e)
changes/49/680349/1
Lucian Petrut 3 months ago
parent
commit
ba40656403
4 changed files with 29 additions and 31 deletions
  1. +11
    -9
      compute_hyperv/nova/cluster/livemigrationops.py
  2. +7
    -5
      compute_hyperv/nova/livemigrationops.py
  3. +1
    -3
      compute_hyperv/tests/unit/cluster/test_livemigrationops.py
  4. +10
    -14
      compute_hyperv/tests/unit/test_livemigrationops.py

+ 11
- 9
compute_hyperv/nova/cluster/livemigrationops.py View File

@@ -20,7 +20,6 @@ from nova import exception
from os_win import constants as os_win_const
from os_win import utilsfactory
from oslo_log import log as logging
from oslo_utils import excutils

from compute_hyperv.i18n import _
import compute_hyperv.nova.conf
@@ -72,14 +71,17 @@ class ClusterLiveMigrationOps(livemigrationops.LiveMigrationOps):
dest,
CONF.hyperv.instance_live_migration_timeout)
except Exception:
with excutils.save_and_reraise_exception():
self._check_failed_instance_migration(
instance_ref,
expected_state=os_win_const.CLUSTER_GROUP_ONLINE)

LOG.debug("Calling live migration recover_method "
"for instance.", instance=instance_ref)
recover_method(context, instance_ref, dest, migrate_data)
LOG.exception("Live migration failed. Attempting rollback.",
instance=instance_ref)
# The recover method will update the migration state.
# We won't error out if we manage to recover the instance,
# which would otherwise end up in error state.
self._check_failed_instance_migration(
instance_ref,
expected_state=os_win_const.CLUSTER_GROUP_ONLINE)

recover_method(context, instance_ref, dest, migrate_data)
return

LOG.debug("Calling live migration post_method for instance.",
instance=instance_ref)

+ 7
- 5
compute_hyperv/nova/livemigrationops.py View File

@@ -21,7 +21,6 @@ from nova import exception
from nova.objects import migrate_data as migrate_data_obj
from os_win import utilsfactory
from oslo_log import log as logging
from oslo_utils import excutils

from compute_hyperv.i18n import _
from compute_hyperv.nova import block_device_manager
@@ -77,10 +76,13 @@ class LiveMigrationOps(object):
dest,
migrate_disks=not shared_storage)
except Exception:
with excutils.save_and_reraise_exception():
LOG.debug("Calling live migration recover_method "
"for instance: %s", instance_name)
recover_method(context, instance_ref, dest, migrate_data)
# The recover method will update the migration state.
# We won't error out if we manage to recover the instance,
# which would otherwise end up in error state.
LOG.exception("Live migration failed. Attempting rollback.",
instance=instance_ref)
recover_method(context, instance_ref, dest, migrate_data)
return

LOG.debug("Calling live migration post_method for instance: %s",
instance_name)

+ 1
- 3
compute_hyperv/tests/unit/cluster/test_livemigrationops.py View File

@@ -86,9 +86,7 @@ class ClusterLiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
clustutils = self.livemigrops._clustutils
clustutils.live_migrate_vm.side_effect = nova_test.TestingException

self.assertRaises(
nova_test.TestingException,
self.livemigrops.live_migration,
self.livemigrops.live_migration(
self._fake_context, mock_instance, dest, mock.sentinel.post_method,
recover_method,
block_migration=mock.sentinel.block_migration,

+ 10
- 14
compute_hyperv/tests/unit/test_livemigrationops.py View File

@@ -73,25 +73,21 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
else:
migrate_data = None

self._livemigrops.live_migration(context=self.context,
instance_ref=mock_instance,
dest=fake_dest,
post_method=mock_post,
recover_method=mock_recover,
block_migration=(
mock.sentinel.block_migr),
migrate_data=migrate_data)

if side_effect is os_win_exc.HyperVException:
self.assertRaises(os_win_exc.HyperVException,
self._livemigrops.live_migration,
self.context, mock_instance, fake_dest,
mock_post, mock_recover,
mock.sentinel.block_migr,
migrate_data)
mock_recover.assert_called_once_with(self.context, mock_instance,
fake_dest,
migrate_data)
mock_post.assert_not_called()
else:
self._livemigrops.live_migration(context=self.context,
instance_ref=mock_instance,
dest=fake_dest,
post_method=mock_post,
recover_method=mock_recover,
block_migration=(
mock.sentinel.block_migr),
migrate_data=migrate_data)
post_call_args = mock_post.call_args_list
self.assertEqual(1, len(post_call_args))


Loading…
Cancel
Save