Merge "libvirt: check job status for VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED event" into stable/stein

This commit is contained in:
Zuul 2020-01-22 18:14:07 +00:00 committed by Gerrit Code Review
commit 377ae223c7
5 changed files with 88 additions and 31 deletions

View File

@ -73,6 +73,7 @@ VIR_DOMAIN_EVENT_STOPPED = 5
VIR_DOMAIN_EVENT_SHUTDOWN = 6
VIR_DOMAIN_EVENT_PMSUSPENDED = 7
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1
VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7
VIR_DOMAIN_UNDEFINE_MANAGED_SAVE = 1

View File

@ -210,7 +210,8 @@ class HostTestCase(test.NoDBTestCase):
self.assertEqual(event.EVENT_LIFECYCLE_POSTCOPY_STARTED,
expected_event.transition)
def test_event_lifecycle_callback_suspended_migrated(self):
@mock.patch('nova.virt.libvirt.guest.Guest.get_job_info')
def test_event_lifecycle_callback_suspended_migrated(self, get_job_info):
"""Tests the suspended lifecycle event with libvirt with migrated"""
hostimpl = mock.MagicMock()
conn = mock.MagicMock()
@ -220,22 +221,47 @@ class HostTestCase(test.NoDBTestCase):
</domain>
"""
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
# See https://libvirt.org/html/libvirt-libvirt-domain.html for values.
VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED = 1
with mock.patch.object(host.libvirt,
'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED', new=1,
create=True):
host.Host._event_lifecycle_callback(
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
detail=VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED, opaque=hostimpl)
jobinfo = libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED)
get_job_info.return_value = jobinfo
host.Host._event_lifecycle_callback(
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
detail=fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED,
opaque=hostimpl)
expected_event = hostimpl._queue_event.call_args[0][0]
self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
expected_event.transition)
get_job_info.assert_called_once_with()
@mock.patch('nova.virt.libvirt.guest.Guest.get_job_info')
@mock.patch('nova.virt.libvirt.migration.find_job_type')
def test_event_lifecycle_callback_suspended_migrated_job_failed(
self, find_job_type, get_job_info):
"""Tests the suspended lifecycle event with libvirt with migrated"""
hostimpl = mock.MagicMock()
conn = mock.MagicMock()
fake_dom_xml = """
<domain type='kvm'>
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
</domain>
"""
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
jobinfo = libvirt_guest.JobInfo(type=fakelibvirt.VIR_DOMAIN_JOB_NONE)
get_job_info.return_value = jobinfo
# If the job type is VIR_DOMAIN_JOB_NONE we'll attempt to figure out
# the actual job status, so in this case we mock it to be a failure.
find_job_type.return_value = fakelibvirt.VIR_DOMAIN_JOB_FAILED
host.Host._event_lifecycle_callback(
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
detail=fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED,
opaque=hostimpl)
expected_event = hostimpl._queue_event.call_args[0][0]
# FIXME(mriedem): This should be EVENT_LIFECYCLE_MIGRATION_COMPLETED
# once bug 1788014 is fixed and we properly check job status for the
# VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED case.
# self.assertEqual(event.EVENT_LIFECYCLE_MIGRATION_COMPLETED,
# expected_event.transition)
self.assertEqual(event.EVENT_LIFECYCLE_PAUSED,
expected_event.transition)
get_job_info.assert_called_once_with()
find_job_type.assert_called_once_with(
test.MatchType(libvirt_guest.Guest), instance=None,
logging_ok=False)
def test_event_emit_delayed_call_delayed(self):
ev = event.LifecycleEvent(

View File

@ -973,6 +973,14 @@ class MigrationMonitorTestCase(test.NoDBTestCase):
self.assertEqual(migration.find_job_type(self.guest, self.instance),
fakelibvirt.VIR_DOMAIN_JOB_FAILED)
@mock.patch('nova.virt.libvirt.migration.LOG',
new_callable=mock.NonCallableMock) # asserts not called
@mock.patch('nova.virt.libvirt.guest.Guest.is_active', return_value=True)
def test_live_migration_find_type_no_logging(self, mock_active, _mock_log):
self.assertEqual(fakelibvirt.VIR_DOMAIN_JOB_FAILED,
migration.find_job_type(self.guest, self.instance,
logging_ok=False))
def test_live_migration_abort_too_long(self):
# Elapsed time is over completion timeout
self.assertTrue(migration.should_trigger_timeout_action(

View File

@ -56,6 +56,7 @@ from nova import utils
from nova.virt import event as virtevent
from nova.virt.libvirt import config as vconfig
from nova.virt.libvirt import guest as libvirt_guest
from nova.virt.libvirt import migration as libvirt_migrate
libvirt = None
@ -174,12 +175,27 @@ class Host(object):
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
if detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY:
transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED
# FIXME(mriedem): VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent
# when live migration of the guest fails, so we cannot simply rely
# on the event itself but need to check if the job itself was
# successful.
# elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED:
# transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED:
# VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED is also sent when live
# migration of the guest fails, so we cannot simply rely
# on the event itself but need to check if the job itself was
# successful.
# NOTE(mriedem): The job check logic here is copied from
# LibvirtDriver._live_migration_monitor.
guest = libvirt_guest.Guest(dom)
info = guest.get_job_info()
if info.type == libvirt.VIR_DOMAIN_JOB_NONE:
# Either still running, or failed or completed,
# lets untangle the mess.
info.type = libvirt_migrate.find_job_type(
guest, instance=None, logging_ok=False)
if info.type == libvirt.VIR_DOMAIN_JOB_COMPLETED:
transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
else:
# Failed or some other status we don't know about, so just
# opt to report the guest is paused.
transition = virtevent.EVENT_LIFECYCLE_PAUSED
else:
transition = virtevent.EVENT_LIFECYCLE_PAUSED
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:

View File

@ -340,11 +340,13 @@ def _update_vif_xml(xml_doc, migrate_data, get_vif_config):
return xml_doc
def find_job_type(guest, instance):
def find_job_type(guest, instance, logging_ok=True):
"""Determine the (likely) current migration job type
:param guest: a nova.virt.libvirt.guest.Guest
:param instance: a nova.objects.Instance
:param logging_ok: If logging in this method is OK. If called from a
native thread then logging is generally prohibited.
Annoyingly when job type == NONE and migration is
no longer running, we don't know whether we stopped
@ -354,25 +356,29 @@ def find_job_type(guest, instance):
:returns: a libvirt job type constant
"""
def _log(func, msg, *args, **kwargs):
if logging_ok:
func(msg, *args, **kwargs)
try:
if guest.is_active():
LOG.debug("VM running on src, migration failed",
instance=instance)
_log(LOG.debug, "VM running on src, migration failed",
instance=instance)
return libvirt.VIR_DOMAIN_JOB_FAILED
else:
LOG.debug("VM is shutoff, migration finished",
instance=instance)
_log(LOG.debug, "VM is shutoff, migration finished",
instance=instance)
return libvirt.VIR_DOMAIN_JOB_COMPLETED
except libvirt.libvirtError as ex:
LOG.debug("Error checking domain status %(ex)s",
{"ex": ex}, instance=instance)
_log(LOG.debug, "Error checking domain status %(ex)s", {"ex": ex},
instance=instance)
if ex.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN:
LOG.debug("VM is missing, migration finished",
instance=instance)
_log(LOG.debug, "VM is missing, migration finished",
instance=instance)
return libvirt.VIR_DOMAIN_JOB_COMPLETED
else:
LOG.info("Error %(ex)s, migration failed",
{"ex": ex}, instance=instance)
_log(LOG.info, "Error %(ex)s, migration failed", {"ex": ex},
instance=instance)
return libvirt.VIR_DOMAIN_JOB_FAILED