diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index e3a3c58d9f7f..f95081db7bed 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -8214,6 +8214,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): EXPECT_FAILURE = 2 EXPECT_ABORT = 3 + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") @mock.patch.object(time, "time") @mock.patch.object(time, "sleep", side_effect=lambda x: eventlet.sleep(0)) @@ -8237,9 +8238,12 @@ class LibvirtConnTestCase(test.NoDBTestCase): mock_conn, mock_sleep, mock_time, + mock_postcopy_switch, + current_mig_status=None, expected_mig_status=None, scheduled_action=None, - scheduled_action_executed=False): + scheduled_action_executed=False, + block_migration=False): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) instance = objects.Instance(**self.test_instance) drvr.active_migrations[instance.uuid] = deque() @@ -8258,7 +8262,8 @@ class LibvirtConnTestCase(test.NoDBTestCase): elif rec == "domain-stop": dom.destroy() elif rec == "force_complete": - drvr.active_migrations[instance.uuid].append("pause") + drvr.active_migrations[instance.uuid].append( + "force-complete") else: if len(time_records) > 0: time_records.pop(0) @@ -8279,7 +8284,11 @@ class LibvirtConnTestCase(test.NoDBTestCase): dest = mock.sentinel.migrate_dest migration = objects.Migration(context=self.context, id=1) migrate_data = objects.LibvirtLiveMigrateData( - migration=migration) + migration=migration, block_migration=block_migration) + + if current_mig_status: + migrate_data.migration.status = current_mig_status + migrate_data.migration.save() fake_post_method = mock.MagicMock() fake_recover_method = mock.MagicMock() @@ -8294,9 +8303,13 @@ class LibvirtConnTestCase(test.NoDBTestCase): if scheduled_action_executed: if scheduled_action == 'pause': self.assertTrue(mock_pause.called) + if scheduled_action == 'postcopy_switch': + self.assertTrue(mock_postcopy_switch.called) else: if scheduled_action == 'pause': self.assertFalse(mock_pause.called) + if scheduled_action == 'postcopy_switch': + self.assertFalse(mock_postcopy_switch.called) mock_mig_save.assert_called_with() if expect_result == self.EXPECT_SUCCESS: @@ -8365,6 +8378,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): self._test_live_migration_monitoring(domain_info_records, [], self.EXPECT_SUCCESS, + current_mig_status="running", scheduled_action="pause", scheduled_action_executed=True) @@ -8390,6 +8404,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): self._test_live_migration_monitoring(domain_info_records, [], self.EXPECT_SUCCESS, + current_mig_status="preparing", scheduled_action="pause", scheduled_action_executed=True) @@ -8415,6 +8430,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): self._test_live_migration_monitoring(domain_info_records, [], self.EXPECT_SUCCESS, + current_mig_status="completed", scheduled_action="pause", scheduled_action_executed=False) @@ -8439,6 +8455,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): self._test_live_migration_monitoring(domain_info_records, [], self.EXPECT_FAILURE, + current_mig_status="cancelled", expected_mig_status='cancelled', scheduled_action="pause", scheduled_action_executed=False) @@ -8467,6 +8484,211 @@ class LibvirtConnTestCase(test.NoDBTestCase): scheduled_action="pause", scheduled_action_executed=False) + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_normal(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and postcopy + # switch scheduled in between VIR_DOMAIN_JOB_UNBOUNDED + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_SUCCESS, + current_mig_status="running", + scheduled_action="postcopy_switch", + scheduled_action_executed=True) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_on_start(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and postcopy + # switch scheduled in case of job type VIR_DOMAIN_JOB_NONE and + # finish_event is not ready yet + mock_postcopy_enabled.return_value = True + domain_info_records = [ + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_SUCCESS, + current_mig_status="preparing", + scheduled_action="postcopy_switch", + scheduled_action_executed=True) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_on_finish(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and postcopy + # switch scheduled in case of job type VIR_DOMAIN_JOB_NONE and + # finish_event is ready + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_SUCCESS, + current_mig_status="completed", + scheduled_action="postcopy_switch", + scheduled_action_executed=False) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_on_cancel(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and postcopy + # scheduled in case of job type VIR_DOMAIN_JOB_CANCELLED + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_CANCELLED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_FAILURE, + current_mig_status="cancelled", + expected_mig_status='cancelled', + scheduled_action="postcopy_switch", + scheduled_action_executed=False) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_pause_on_postcopy(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and pause + # scheduled after migration switched to postcopy + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_SUCCESS, + current_mig_status="running (post-copy)", + scheduled_action="pause", + scheduled_action_executed=False) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_on_postcopy(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and pause + # scheduled after migration switched to postcopy + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_SUCCESS, + current_mig_status="running (post-copy)", + scheduled_action="postcopy_switch", + scheduled_action_executed=False) + + @mock.patch.object(libvirt_driver.LibvirtDriver, + "_is_post_copy_enabled") + def test_live_migration_handle_postcopy_on_failure(self, + mock_postcopy_enabled): + # A normal sequence where see all the normal job states, and postcopy + # scheduled in case of job type VIR_DOMAIN_JOB_FAILED + mock_postcopy_enabled.return_value = True + domain_info_records = [ + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_NONE), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED), + "thread-finish", + "domain-stop", + "force_complete", + libvirt_guest.JobInfo( + type=fakelibvirt.VIR_DOMAIN_JOB_FAILED), + ] + + self._test_live_migration_monitoring(domain_info_records, [], + self.EXPECT_FAILURE, + scheduled_action="postcopy_switch", + scheduled_action_executed=False) + def test_live_migration_monitor_success_race(self): # A normalish sequence but we're too slow to see the # completed job state @@ -14134,7 +14356,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): drvr.active_migrations[instance.uuid] = deque() drvr.live_migration_force_complete(instance) self.assertEqual( - 1, drvr.active_migrations[instance.uuid].count("pause")) + 1, drvr.active_migrations[instance.uuid].count("force-complete")) @mock.patch.object(host.Host, "get_connection") @mock.patch.object(fakelibvirt.virDomain, "abortJob") diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py index 4f455134d8c3..a14070e03649 100644 --- a/nova/tests/unit/virt/libvirt/test_migration.py +++ b/nova/tests/unit/virt/libvirt/test_migration.py @@ -429,54 +429,117 @@ class MigrationMonitorTestCase(test.NoDBTestCase): mock_msave.assert_called_once_with() mock_isave.assert_called_once_with() + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") @mock.patch.object(libvirt_guest.Guest, "pause") - def test_live_migration_run_tasks_pause(self, mock_pause): - tasks = deque() - tasks.append("pause") - active_migrations = {self.instance.uuid: tasks} - on_migration_failure = deque() - - migration.run_tasks(self.guest, self.instance, - active_migrations, on_migration_failure) - - mock_pause.assert_called_once_with() - self.assertEqual(len(on_migration_failure), 1) - self.assertEqual(on_migration_failure.pop(), "unpause") - - @mock.patch.object(libvirt_guest.Guest, "pause") - def test_live_migration_run_tasks_empty_tasks(self, mock_pause): + def test_live_migration_run_tasks_empty_tasks(self, mock_pause, + mock_postcopy): tasks = deque() active_migrations = {self.instance.uuid: tasks} on_migration_failure = deque() + mig = objects.Migration(id=1, status="running") + migration.run_tasks(self.guest, self.instance, - active_migrations, on_migration_failure) + active_migrations, on_migration_failure, + mig, False) self.assertFalse(mock_pause.called) + self.assertFalse(mock_postcopy.called) self.assertEqual(len(on_migration_failure), 0) + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") @mock.patch.object(libvirt_guest.Guest, "pause") - def test_live_migration_run_tasks_no_tasks(self, mock_pause): + def test_live_migration_run_tasks_no_tasks(self, mock_pause, + mock_postcopy): active_migrations = {} on_migration_failure = deque() + mig = objects.Migration(id=1, status="running") + migration.run_tasks(self.guest, self.instance, - active_migrations, on_migration_failure) + active_migrations, on_migration_failure, + mig, False) self.assertFalse(mock_pause.called) + self.assertFalse(mock_postcopy.called) self.assertEqual(len(on_migration_failure), 0) + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") @mock.patch.object(libvirt_guest.Guest, "pause") - def test_live_migration_run_tasks_no_pause(self, mock_pause): + def test_live_migration_run_tasks_no_force_complete(self, mock_pause, + mock_postcopy): tasks = deque() # Test to ensure unknown tasks are ignored tasks.append("wibble") active_migrations = {self.instance.uuid: tasks} on_migration_failure = deque() - migration.run_tasks(self.guest, self.instance, - active_migrations, on_migration_failure) + mig = objects.Migration(id=1, status="running") + migration.run_tasks(self.guest, self.instance, + active_migrations, on_migration_failure, + mig, False) + + self.assertFalse(mock_pause.called) + self.assertFalse(mock_postcopy.called) + self.assertEqual(len(on_migration_failure), 0) + + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") + @mock.patch.object(libvirt_guest.Guest, "pause") + def test_live_migration_run_tasks_force_complete(self, mock_pause, + mock_postcopy): + tasks = deque() + tasks.append("force-complete") + active_migrations = {self.instance.uuid: tasks} + on_migration_failure = deque() + + mig = objects.Migration(id=1, status="running") + + migration.run_tasks(self.guest, self.instance, + active_migrations, on_migration_failure, + mig, False) + + mock_pause.assert_called_once_with() + self.assertFalse(mock_postcopy.called) + self.assertEqual(len(on_migration_failure), 1) + self.assertEqual(on_migration_failure.pop(), "unpause") + + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") + @mock.patch.object(libvirt_guest.Guest, "pause") + def test_live_migration_run_tasks_force_complete_postcopy_running(self, + mock_pause, mock_postcopy): + tasks = deque() + tasks.append("force-complete") + active_migrations = {self.instance.uuid: tasks} + on_migration_failure = deque() + + mig = objects.Migration(id=1, status="running (post-copy)") + + migration.run_tasks(self.guest, self.instance, + active_migrations, on_migration_failure, + mig, True) + + self.assertFalse(mock_pause.called) + self.assertFalse(mock_postcopy.called) + self.assertEqual(len(on_migration_failure), 0) + + @mock.patch.object(objects.Migration, "save") + @mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy") + @mock.patch.object(libvirt_guest.Guest, "pause") + def test_live_migration_run_tasks_force_complete_postcopy(self, + mock_pause, mock_postcopy, mock_msave): + tasks = deque() + tasks.append("force-complete") + active_migrations = {self.instance.uuid: tasks} + on_migration_failure = deque() + + mig = objects.Migration(id=1, status="running") + + migration.run_tasks(self.guest, self.instance, + active_migrations, on_migration_failure, + mig, True) + + mock_postcopy.assert_called_once_with() self.assertFalse(mock_pause.called) self.assertEqual(len(on_migration_failure), 0) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 99c2cddf0a1f..8b7c34877794 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -6069,6 +6069,11 @@ class LibvirtDriver(driver.ComputeDriver): return ram_gb + disk_gb + def _get_migration_flags(self, is_block_migration): + if is_block_migration: + return self._block_migration_flags + return self._live_migration_flags + def _live_migration_monitor(self, context, instance, guest, dest, post_method, recover_method, block_migration, @@ -6083,10 +6088,14 @@ class LibvirtDriver(driver.ComputeDriver): migration = migrate_data.migration curdowntime = None + migration_flags = self._get_migration_flags( + migrate_data.block_migration) + n = 0 start = time.time() progress_time = start progress_watermark = None + is_post_copy_enabled = self._is_post_copy_enabled(migration_flags) while True: info = guest.get_job_info() @@ -6113,7 +6122,9 @@ class LibvirtDriver(driver.ComputeDriver): # the operation, change max bandwidth libvirt_migrate.run_tasks(guest, instance, self.active_migrations, - on_migration_failure) + on_migration_failure, + migration, + is_post_copy_enabled) now = time.time() elapsed = now - start @@ -6290,12 +6301,15 @@ class LibvirtDriver(driver.ComputeDriver): LOG.debug("Live migration monitoring is all done", instance=instance) + def _is_post_copy_enabled(self, migration_flags): + if self._is_post_copy_available(): + if (migration_flags & libvirt.VIR_MIGRATE_POSTCOPY) != 0: + return True + return False + def live_migration_force_complete(self, instance): - # NOTE(pkoniszewski): currently only pause during live migration is - # supported to force live migration to complete, so just try to pause - # the instance try: - self.active_migrations[instance.uuid].append('pause') + self.active_migrations[instance.uuid].append('force-complete') except KeyError: raise exception.NoActiveMigrationForInstance( instance_id=instance.uuid) diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index e539a0c00233..ae8d43ef0219 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -539,6 +539,10 @@ class Guest(object): """ self._domain.migrateSetMaxDowntime(mstime) + def migrate_start_postcopy(self): + """Switch running live migration to post-copy mode""" + self._domain.migrateStartPostCopy() + def get_job_info(self): """Get job info for the domain diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py index 170f85e0b8fd..381b73dca02d 100644 --- a/nova/virt/libvirt/migration.py +++ b/nova/virt/libvirt/migration.py @@ -283,34 +283,62 @@ def save_stats(instance, migration, info, remaining): instance.save() -def run_tasks(guest, instance, active_migrations, on_migration_failure): +def trigger_postcopy_switch(guest, instance, migration): + try: + guest.migrate_start_postcopy() + except libvirt.libvirtError as e: + LOG.warning(_LW("Failed to switch to post-copy live " + "migration: %s"), + e, instance=instance) + else: + # NOTE(ltomas): Change the migration status to indicate that + # it is in post-copy active mode, i.e., the VM at + # destination is the active one + LOG.info(_LI("Switching to post-copy migration mode"), + instance=instance) + migration.status = 'running (post-copy)' + migration.save() + + +def run_tasks(guest, instance, active_migrations, on_migration_failure, + migration, is_post_copy_enabled): """Run any pending migration tasks :param guest: a nova.virt.libvirt.guest.Guest :param instance: a nova.objects.Instance :param active_migrations: dict of active migrations :param on_migration_failure: queue of recovery tasks + :param migration: a nova.objects.Migration + :param is_post_copy_enabled: True if post-copy can be used Run any pending migration tasks queued against the provided instance object. The active migrations dict should use instance UUIDs for keys and a queue of tasks as the values. - Currently the only valid task that can be requested - is "pause". Other tasks will be ignored + Currently the valid tasks that can be requested + are "pause" and "force-complete". Other tasks will + be ignored. """ tasks = active_migrations.get(instance.uuid, deque()) while tasks: task = tasks.popleft() - if task == 'pause': - try: - guest.pause() - on_migration_failure.append("unpause") - except Exception as e: - LOG.warning(_LW("Failed to pause instance during " - "live-migration %s"), - e, instance=instance) + if task == 'force-complete': + if migration.status == 'running (post-copy)': + LOG.warning(_LW("Live-migration %s already switched " + "to post-copy mode."), + instance=instance) + elif is_post_copy_enabled: + trigger_postcopy_switch(guest, instance, migration) + else: + try: + guest.pause() + on_migration_failure.append("unpause") + except Exception as e: + LOG.warning(_LW("Failed to pause instance during " + "live-migration %s"), + e, instance=instance) else: LOG.warning(_LW("Unknown migration task '%(task)s'"), {"task": task}, instance=instance) diff --git a/releasenotes/notes/automatic-live-migration-completion-post-copy-a7a3a986961c93d8.yaml b/releasenotes/notes/automatic-live-migration-completion-post-copy-a7a3a986961c93d8.yaml index 09ed02c8285d..17ec7aa88d6a 100644 --- a/releasenotes/notes/automatic-live-migration-completion-post-copy-a7a3a986961c93d8.yaml +++ b/releasenotes/notes/automatic-live-migration-completion-post-copy-a7a3a986961c93d8.yaml @@ -3,4 +3,8 @@ features: - New configuration option live_migration_permit_post_copy has been added to start live migrations in a way that allows nova to switch an on-going live migration to post-copy mode. - Requires libvirt>=1.3.3 and QEMU>=2.5.0. + Requires libvirt>=1.3.3 and QEMU>=2.5.0. If post copy is + permitted and version requirements are met it also changes + behaviour of 'live_migration_force_complete', so that it + switches on-going live migration to post-copy mode instead + of pausing an instance during live migration.