Merge "libvirt: check job status for VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED event" into stable/stein

This commit is contained in:
Zuul 2020-01-22 18:14:07 +00:00 committed by Gerrit Code Review
commit 377ae223c7
5 changed files with 88 additions and 31 deletions

View File

@ -73,6 +73,7 @@ VIR_DOMAIN_EVENT_STOPPED = 5
VIR_DOMAIN_EVENT_SHUTDOWN = 6 VIR_DOMAIN_EVENT_SHUTDOWN = 6
VIR_DOMAIN_EVENT_PMSUSPENDED = 7 VIR_DOMAIN_EVENT_PMSUSPENDED = 7
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1
VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7 VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7
VIR_DOMAIN_UNDEFINE_MANAGED_SAVE = 1 VIR_DOMAIN_UNDEFINE_MANAGED_SAVE = 1

View File

@ -210,7 +210,8 @@ class HostTestCase(test.NoDBTestCase):
self.assertEqual(event.EVENT_LIFECYCLE_POSTCOPY_STARTED, self.assertEqual(event.EVENT_LIFECYCLE_POSTCOPY_STARTED,
expected_event.transition) expected_event.transition)
def test_event_lifecycle_callback_suspended_migrated(self): @mock.patch('nova.virt.libvirt.guest.Guest.get_job_info')
def test_event_lifecycle_callback_suspended_migrated(self, get_job_info):
"""Tests the suspended lifecycle event with libvirt with migrated""" """Tests the suspended lifecycle event with libvirt with migrated"""
hostimpl = mock.MagicMock() hostimpl = mock.MagicMock()
conn = mock.MagicMock() conn = mock.MagicMock()
@ -220,22 +221,47 @@ class HostTestCase(test.NoDBTestCase):
</domain> </domain>
""" """
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True) dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
# See https://libvirt.org/html/libvirt-libvirt-domain.html for values. jobinfo = libvirt_guest.JobInfo(
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1 type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED)
with mock.patch.object(host.libvirt, get_job_info.return_value = jobinfo
'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED', new=1, host.Host._event_lifecycle_callback(
create=True): conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
host.Host._event_lifecycle_callback( detail=fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED,
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED, opaque=hostimpl)
detail=VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED, opaque=hostimpl) expected_event = hostimpl._queue_event.call_args[0][0]
self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
expected_event.transition)
get_job_info.assert_called_once_with()
@mock.patch('nova.virt.libvirt.guest.Guest.get_job_info')
@mock.patch('nova.virt.libvirt.migration.find_job_type')
def test_event_lifecycle_callback_suspended_migrated_job_failed(
self, find_job_type, get_job_info):
"""Tests the suspended lifecycle event with libvirt with migrated"""
hostimpl = mock.MagicMock()
conn = mock.MagicMock()
fake_dom_xml = """
<domain type='kvm'>
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
</domain>
"""
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
jobinfo = libvirt_guest.JobInfo(type=fakelibvirt.VIR_DOMAIN_JOB_NONE)
get_job_info.return_value = jobinfo
# If the job type is VIR_DOMAIN_JOB_NONE we'll attempt to figure out
# the actual job status, so in this case we mock it to be a failure.
find_job_type.return_value = fakelibvirt.VIR_DOMAIN_JOB_FAILED
host.Host._event_lifecycle_callback(
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
detail=fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED,
opaque=hostimpl)
expected_event = hostimpl._queue_event.call_args[0][0] expected_event = hostimpl._queue_event.call_args[0][0]
# FIXME(mriedem): This should be EVENT_LIFECYCLE_MIGRATION_COMPLETED
# once bug 1788014 is fixed and we properly check job status for the
# VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED case.
# self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
# expected_event.transition)
self.assertEqual(event.EVENT_LIFECYCLE_PAUSED, self.assertEqual(event.EVENT_LIFECYCLE_PAUSED,
expected_event.transition) expected_event.transition)
get_job_info.assert_called_once_with()
find_job_type.assert_called_once_with(
test.MatchType(libvirt_guest.Guest), instance=None,
logging_ok=False)
def test_event_emit_delayed_call_delayed(self): def test_event_emit_delayed_call_delayed(self):
ev = event.LifecycleEvent( ev = event.LifecycleEvent(

View File

@ -973,6 +973,14 @@ class MigrationMonitorTestCase(test.NoDBTestCase):
self.assertEqual(migration.find_job_type(self.guest, self.instance), self.assertEqual(migration.find_job_type(self.guest, self.instance),
fakelibvirt.VIR_DOMAIN_JOB_FAILED) fakelibvirt.VIR_DOMAIN_JOB_FAILED)
@mock.patch('nova.virt.libvirt.migration.LOG',
new_callable=mock.NonCallableMock) # asserts not called
@mock.patch('nova.virt.libvirt.guest.Guest.is_active', return_value=True)
def test_live_migration_find_type_no_logging(self, mock_active, _mock_log):
self.assertEqual(fakelibvirt.VIR_DOMAIN_JOB_FAILED,
migration.find_job_type(self.guest, self.instance,
logging_ok=False))
def test_live_migration_abort_too_long(self): def test_live_migration_abort_too_long(self):
# Elapsed time is over completion timeout # Elapsed time is over completion timeout
self.assertTrue(migration.should_trigger_timeout_action( self.assertTrue(migration.should_trigger_timeout_action(

View File

@ -56,6 +56,7 @@ from nova import utils
from nova.virt import event as virtevent from nova.virt import event as virtevent
from nova.virt.libvirt import config as vconfig from nova.virt.libvirt import config as vconfig
from nova.virt.libvirt import guest as libvirt_guest from nova.virt.libvirt import guest as libvirt_guest
from nova.virt.libvirt import migration as libvirt_migrate
libvirt = None libvirt = None
@ -174,12 +175,27 @@ class Host(object):
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED: elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
if detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY: if detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY:
transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED
# FIXME(mriedem): VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED:
# when live migration of the guest fails, so we cannot simply rely # VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent when live
# on the event itself but need to check if the job itself was # migration of the guest fails, so we cannot simply rely
# successful. # on the event itself but need to check if the job itself was
# elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED: # successful.
# transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED # NOTE(mriedem): The job check logic here is copied from
# LibvirtDriver._live_migration_monitor.
guest = libvirt_guest.Guest(dom)
info = guest.get_job_info()
if info.type == libvirt.VIR_DOMAIN_JOB_NONE:
# Either still running, or failed or completed,
# lets untangle the mess.
info.type = libvirt_migrate.find_job_type(
guest, instance=None, logging_ok=False)
if info.type == libvirt.VIR_DOMAIN_JOB_COMPLETED:
transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
else:
# Failed or some other status we don't know about, so just
# opt to report the guest is paused.
transition = virtevent.EVENT_LIFECYCLE_PAUSED
else: else:
transition = virtevent.EVENT_LIFECYCLE_PAUSED transition = virtevent.EVENT_LIFECYCLE_PAUSED
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED: elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:

View File

@ -340,11 +340,13 @@ def _update_vif_xml(xml_doc, migrate_data, get_vif_config):
return xml_doc return xml_doc
def find_job_type(guest, instance): def find_job_type(guest, instance, logging_ok=True):
"""Determine the (likely) current migration job type """Determine the (likely) current migration job type
:param guest: a nova.virt.libvirt.guest.Guest :param guest: a nova.virt.libvirt.guest.Guest
:param instance: a nova.objects.Instance :param instance: a nova.objects.Instance
:param logging_ok: If logging in this method is OK. If called from a
native thread then logging is generally prohibited.
Annoyingly when job type == NONE and migration is Annoyingly when job type == NONE and migration is
no longer running, we don't know whether we stopped no longer running, we don't know whether we stopped
@ -354,25 +356,29 @@ def find_job_type(guest, instance):
:returns: a libvirt job type constant :returns: a libvirt job type constant
""" """
def _log(func, msg, *args, **kwargs):
if logging_ok:
func(msg, *args, **kwargs)
try: try:
if guest.is_active(): if guest.is_active():
LOG.debug("VM running on src, migration failed", _log(LOG.debug, "VM running on src, migration failed",
instance=instance) instance=instance)
return libvirt.VIR_DOMAIN_JOB_FAILED return libvirt.VIR_DOMAIN_JOB_FAILED
else: else:
LOG.debug("VM is shutoff, migration finished", _log(LOG.debug, "VM is shutoff, migration finished",
instance=instance) instance=instance)
return libvirt.VIR_DOMAIN_JOB_COMPLETED return libvirt.VIR_DOMAIN_JOB_COMPLETED
except libvirt.libvirtError as ex: except libvirt.libvirtError as ex:
LOG.debug("Error checking domain status %(ex)s", _log(LOG.debug, "Error checking domain status %(ex)s", {"ex": ex},
{"ex": ex}, instance=instance) instance=instance)
if ex.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: if ex.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN:
LOG.debug("VM is missing, migration finished", _log(LOG.debug, "VM is missing, migration finished",
instance=instance) instance=instance)
return libvirt.VIR_DOMAIN_JOB_COMPLETED return libvirt.VIR_DOMAIN_JOB_COMPLETED
else: else:
LOG.info("Error %(ex)s, migration failed", _log(LOG.info, "Error %(ex)s, migration failed", {"ex": ex},
{"ex": ex}, instance=instance) instance=instance)
return libvirt.VIR_DOMAIN_JOB_FAILED return libvirt.VIR_DOMAIN_JOB_FAILED