Ensure instance can migrate when launched concurrently

When we fixed the problem in If7da79356174be57481ef246618221e3b2ff8200 we forgot to modify a specific check in select_destinations(). Since _schedule() is returning the correct number of needed hosts but we were still using the wrong number of instances to verify, the conditional in select_destinations() was always incorrect. Note that we needed to modify test.py and fake driver because: - _do_check_can_live_migrate_destination was using CONF.host - check_can_live_migrate_destination in the fake driver was incorrectly trying to set a None value to the object while libvirt fixed that earlier (block_migration=None when the user specifies block_migration='auto' in the API) - pre_live_migration in the fake driver was not returning migrate_data, which is passed through live_migration and _post_live_migration to set the migration object status Change-Id: Iff839f3478ebe77bf3e2c4becbe9b9b62fff5035 Closes-Bug: #1718455
2017-09-21 10:52:46 +02:00 · 2017-09-21 10:52:46 +02:00 · 87ca0d8af0
parent ae4b5d0147
commit 87ca0d8af0
6 changed files with 60 additions and 17 deletions
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@ -78,7 +78,12 @@ class FilterScheduler(driver.Scheduler):
            context, 'scheduler.select_destinations.start',
            dict(request_spec=spec_obj.to_legacy_request_spec_dict()))

-        num_instances = spec_obj.num_instances
+        # NOTE(sbauza): The RequestSpec.num_instances field contains the number
+        # of instances created when the RequestSpec was used to first boot some
+        # instances. This is incorrect when doing a move or resize operation,
+        # so prefer the length of instance_uuids unless it is None.
+        num_instances = (len(instance_uuids) if instance_uuids
+                         else spec_obj.num_instances)
        selected_hosts = self._schedule(context, spec_obj, instance_uuids,
            alloc_reqs_by_rp_uuid, provider_summaries)

--- a/nova/test.py
+++ b/nova/test.py
@ -405,7 +405,9 @@ class TestCase(testtools.TestCase):
                                     cell_mapping=cell)
            hm.create()
            self.host_mappings[hm.host] = hm
-
+            if host is not None:
+                # Make sure that CONF.host is relevant to the right hostname
+                self.useFixture(nova_fixtures.ConfPatcher(host=host))
        svc = self.useFixture(
            nova_fixtures.ServiceFixture(name, host, **kwargs))

--- a/nova/tests/functional/regressions/test_bug_1718455.py
+++ b/nova/tests/functional/regressions/test_bug_1718455.py
@ -36,7 +36,7 @@ class TestLiveMigrateOneOfConcurrentlyCreatedInstances(
    which is the number of instances created concurrently.

    That test will create 2 concurrent instances and verify that when
-    live-migrating one of them, we end up with a NoValidHost exception.
+    live-migrating one of them, we end up with a correct move operation.
    """

    microversion = 'latest'
@ -109,7 +109,8 @@ class TestLiveMigrateOneOfConcurrentlyCreatedInstances(
                    return migration
            time.sleep(0.5)
        self.fail('Timed out waiting for migration with status "%s" for '
-                  'instance: %s' % (expected_status, server['id']))
+                  'instance: %s. Current instance migrations: %s' %
+                  (expected_status, server['id'], migrations))

    def test_live_migrate_one_multi_created_instance(self):
        # Boot two servers in a multi-create request
@ -131,13 +132,13 @@ class TestLiveMigrateOneOfConcurrentlyCreatedInstances(
        # we need to lookup the migrations API.
        self.api.post_server_action(server['id'], post)

-        # Poll the migration until it fails
-        migration = self._wait_for_migration_status(server, 'error')
+        # Poll the migration until it is done.
+        migration = self._wait_for_migration_status(server, 'completed')

        self.assertEqual('live-migration', migration['migration_type'])
        self.assertEqual(original_host, migration['source_compute'])

-        # Verify that the migration failed as the instance is still on the
-        # source node.
+        # Verify that the migration succeeded as the instance is now on the
+        # destination node.
        server = self.api.get_server(server['id'])
-        self.assertEqual(original_host, server['OS-EXT-SRV-ATTR:host'])
+        self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host'])
--- a/nova/tests/unit/scheduler/test_filter_scheduler.py
+++ b/nova/tests/unit/scheduler/test_filter_scheduler.py
@ -527,7 +527,7 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
    def test_select_destinations_match_num_instances(self, mock_schedule):
        """Tests that the select_destinations() method returns the list of
        hosts from the _schedule() method when the number of returned hosts
-        equals the num_instances on the spec object
+        equals the number of instance UUIDs passed in.
        """
        spec_obj = objects.RequestSpec(
            flavor=objects.Flavor(memory_mb=512,
@ -541,11 +541,41 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        mock_schedule.return_value = [mock.sentinel.hs1]

        dests = self.driver.select_destinations(self.context, spec_obj,
-            mock.sentinel.instance_uuids, mock.sentinel.alloc_reqs_by_rp_uuid,
+            [mock.sentinel.instance_uuid], mock.sentinel.alloc_reqs_by_rp_uuid,
            mock.sentinel.p_sums)

        mock_schedule.assert_called_once_with(self.context, spec_obj,
-            mock.sentinel.instance_uuids, mock.sentinel.alloc_reqs_by_rp_uuid,
+            [mock.sentinel.instance_uuid], mock.sentinel.alloc_reqs_by_rp_uuid,
+            mock.sentinel.p_sums)
+
+        self.assertEqual([mock.sentinel.hs1], dests)
+
+    @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+                '_schedule')
+    def test_select_destinations_for_move_ops(self, mock_schedule):
+        """Tests that the select_destinations() method verifies the number of
+        hosts returned from the _schedule() method against the number of
+        instance UUIDs passed as a parameter and not against the RequestSpec
+        num_instances field since the latter could be wrong in case of a move
+        operation.
+        """
+        spec_obj = objects.RequestSpec(
+            flavor=objects.Flavor(memory_mb=512,
+                                  root_gb=512,
+                                  ephemeral_gb=0,
+                                  swap=0,
+                                  vcpus=1),
+            project_id=uuids.project_id,
+            num_instances=2)
+
+        mock_schedule.return_value = [mock.sentinel.hs1]
+
+        dests = self.driver.select_destinations(self.context, spec_obj,
+            [mock.sentinel.instance_uuid], mock.sentinel.alloc_reqs_by_rp_uuid,
+            mock.sentinel.p_sums)
+
+        mock_schedule.assert_called_once_with(self.context, spec_obj,
+            [mock.sentinel.instance_uuid], mock.sentinel.alloc_reqs_by_rp_uuid,
            mock.sentinel.p_sums)

        self.assertEqual([mock.sentinel.hs1], dests)
@ -571,8 +601,8 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):

        self.assertRaises(exception.NoValidHost,
            self.driver.select_destinations, self.context, spec_obj,
-            mock.sentinel.instance_uuids, mock.sentinel.alloc_reqs_by_rp_uuid,
-            mock.sentinel.p_sums)
+            [mock.sentinel.instance_uuid1, mock.sentinel.instance_uuid2],
+            mock.sentinel.alloc_reqs_by_rp_uuid, mock.sentinel.p_sums)

        # Verify that the host state object has been marked as not updated so
        # it's picked up in the next pull from the DB for compute node objects
--- a/nova/virt/driver.py
+++ b/nova/virt/driver.py
@ -816,6 +816,7 @@ class ComputeDriver(object):
        :param network_info: instance network information
        :param disk_info: instance disk information
        :param migrate_data: a LiveMigrateData object
+        :returns: migrate_data modified by the driver
        """
        raise NotImplementedError()

--- a/nova/virt/fake.py
+++ b/nova/virt/fake.py
@ -510,8 +510,12 @@ class FakeDriver(driver.ComputeDriver):
        data.graphics_listen_addr_vnc = CONF.vnc.server_listen
        data.graphics_listen_addr_spice = CONF.spice.server_listen
        data.serial_listen_addr = None
-        data.block_migration = block_migration
-        data.disk_over_commit = disk_over_commit or False  # called with None
+        # Notes(eliqiao): block_migration and disk_over_commit are not
+        # nullable, so just don't set them if they are None
+        if block_migration is not None:
+            data.block_migration = block_migration
+        if disk_over_commit is not None:
+            data.disk_over_commit = disk_over_commit
        data.disk_available_mb = 100000
        data.is_shared_block_storage = True
        data.is_shared_instance_path = True
@ -532,7 +536,7 @@ class FakeDriver(driver.ComputeDriver):

    def pre_live_migration(self, context, instance, block_device_info,
                           network_info, disk_info, migrate_data):
-        return
+        return migrate_data

    def unfilter_instance(self, instance, network_info):
        return