Recalculate 'RequestSpec.numa_topology' on resize

When resizing, it's possible to change the NUMA topology of an instance, or remove it entirely, due to different extra specs in the new flavor. Unfortunately we cache the instance's NUMA topology object in 'RequestSpec.numa_topology' and don't update it when resizing. This means if a given host doesn't have enough free CPUs or mempages of the size requested by the *old* flavor, that host can be rejected by the filter. Correct this by regenerating the 'RequestSpec.numa_topology' field as part of the resize operation, ensuring that we revert to the old field value in the case of a resize-revert. Change-Id: I0ca50665b86b9fdb4618192d4d6a3bcaa6ea2291 Signed-off-by: Stephen Finucane <sfinucan@redhat.com> Co-Authored-By: He Jie Xu <hejie.xu@intel.com> Closes-bug: #1805767
2019-06-07 16:57:31 +01:00 · 2019-06-07 16:57:31 +01:00 · c29f382f69
parent c16315165c
commit c29f382f69
4 changed files with 208 additions and 21 deletions
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@ -3625,15 +3625,22 @@ class API(base.Base):
            availability_zones.get_host_availability_zone(
                context, migration.source_compute))

-        # Conductor updated the RequestSpec.flavor during the initial resize
-        # operation to point at the new flavor, so we need to update the
-        # RequestSpec to point back at the original flavor, otherwise
-        # subsequent move operations through the scheduler will be using the
-        # wrong flavor.
+        # If this was a resize, the conductor may have updated the
+        # RequestSpec.flavor field (to point at the new flavor) and the
+        # RequestSpec.numa_topology field (to reflect the new flavor's extra
+        # specs) during the initial resize operation, so we need to update the
+        # RequestSpec to point back at the original flavor and reflect the NUMA
+        # settings of this flavor, otherwise subsequent move operations through
+        # the scheduler will be using the wrong values. There's no need to do
+        # this if the flavor hasn't changed though and we're migrating rather
+        # than resizing.
        reqspec = objects.RequestSpec.get_by_instance_uuid(
            context, instance.uuid)
-        reqspec.flavor = instance.old_flavor
-        reqspec.save()
+        if reqspec.flavor['id'] != instance.old_flavor['id']:
+            reqspec.flavor = instance.old_flavor
+            reqspec.numa_topology = hardware.numa_get_constraints(
+                instance.old_flavor, instance.image_meta)
+            reqspec.save()

        # NOTE(gibi): This is a performance optimization. If the network info
        # cache does not have ports with allocations in the binding profile
@ -3890,6 +3897,11 @@ class API(base.Base):
            context, instance.uuid)
        request_spec.ignore_hosts = filter_properties['ignore_hosts']

+        # don't recalculate the NUMA topology unless the flavor has changed
+        if not same_instance_type:
+            request_spec.numa_topology = hardware.numa_get_constraints(
+                new_instance_type, instance.image_meta)
+
        instance.task_state = task_states.RESIZE_PREP
        instance.progress = 0
        instance.auto_disk_config = auto_disk_config or False
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@ -39,6 +39,8 @@ class NUMAServersTestBase(base.ServersTestBase):
    def setUp(self):
        super(NUMAServersTestBase, self).setUp()

+        self.ctxt = nova_context.get_admin_context()
+
        # Mock the 'NUMATopologyFilter' filter, as most tests need to inspect
        # this
        host_manager = self.scheduler.manager.driver.host_manager
@ -166,8 +168,7 @@ class NUMAServersTest(NUMAServersTestBase):

        server = self._run_build_test(flavor_id, expected_usage=expected_usage)

-        ctx = nova_context.get_admin_context()
-        inst = objects.Instance.get_by_uuid(ctx, server['id'])
+        inst = objects.Instance.get_by_uuid(self.ctxt, server['id'])
        self.assertEqual(1, len(inst.numa_topology.cells))
        self.assertEqual(5, inst.numa_topology.cells[0].cpu_topology.cores)

@ -345,6 +346,111 @@ class NUMAServersTest(NUMAServersTestBase):
            self.api.post_server, post)
        self.assertEqual(403, ex.response.status_code)

+    def _inspect_filter_numa_topology(self, cell_count):
+        """Helper function used by test_resize_server_with_numa* tests."""
+        args, kwargs = self.mock_filter.call_args_list[0]
+        self.assertEqual(2, len(args))
+        self.assertEqual({}, kwargs)
+        numa_topology = args[1].numa_topology
+        self.assertEqual(cell_count, len(numa_topology.cells), args)
+
+        # We always reset mock_filter because we don't want these result
+        # fudging later tests
+        self.mock_filter.reset_mock()
+        self.assertEqual(0, len(self.mock_filter.call_args_list))
+
+    def _inspect_request_spec(self, server, cell_count):
+        """Helper function used by test_resize_server_with_numa* tests."""
+        req_spec = objects.RequestSpec.get_by_instance_uuid(
+            self.ctxt, server['id'])
+        self.assertEqual(cell_count, len(req_spec.numa_topology.cells))
+
+    def test_resize_revert_server_with_numa(self):
+        """Create a single-node instance and resize it to a flavor with two
+        nodes, then revert to the old flavor rather than confirm.
+
+        Nothing too complicated going on here. We create an instance with a one
+        NUMA node guest topology and then attempt to resize this to use a
+        topology with two nodes. Once done, we revert this resize to ensure the
+        instance reverts to using the old NUMA topology as expected.
+        """
+        # don't bother waiting for neutron events since we don't actually have
+        # neutron
+        self.flags(vif_plugging_timeout=0)
+
+        host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
+                                         cpu_cores=2, cpu_threads=2,
+                                         kB_mem=15740000)
+
+        # Start services
+        self.computes = {}
+        for host in ['test_compute0', 'test_compute1']:
+            fake_connection = self._get_connection(
+                host_info=host_info, hostname=host)
+
+            # This is fun. Firstly we need to do a global'ish mock so we can
+            # actually start the service.
+            with mock.patch('nova.virt.libvirt.host.Host.get_connection',
+                            return_value=fake_connection):
+                compute = self.start_service('compute', host=host)
+
+            # Once that's done, we need to do some tweaks to each individual
+            # compute "service" to make sure they return unique objects
+            compute.driver._host.get_connection = lambda: fake_connection
+            self.computes[host] = compute
+
+        # STEP ONE
+
+        # Create server
+        extra_spec = {'hw:numa_nodes': '1'}
+        flavor_a_id = self._create_flavor(vcpu=4, extra_spec=extra_spec)
+
+        server = self._create_server(flavor_id=flavor_a_id)
+
+        # Ensure the filter saw the 'numa_topology' field and the request spec
+        # is as expected
+        self._inspect_filter_numa_topology(cell_count=1)
+        self._inspect_request_spec(server, cell_count=1)
+
+        # STEP TWO
+
+        # Create a new flavor with a different but still valid number of NUMA
+        # nodes
+        extra_spec = {'hw:numa_nodes': '2'}
+        flavor_b_id = self._create_flavor(vcpu=4, extra_spec=extra_spec)
+
+        # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
+        # probably be less...dumb
+        with mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
+                        '.migrate_disk_and_power_off', return_value='{}'):
+            post = {'resize': {'flavorRef': flavor_b_id}}
+            self.api.post_server_action(server['id'], post)
+
+        server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
+
+        # Ensure the filter saw 'hw:numa_nodes=2' from flavor_b and the request
+        # spec has been updated
+        self._inspect_filter_numa_topology(cell_count=2)
+        self._inspect_request_spec(server, cell_count=2)
+
+        # STEP THREE
+
+        # Revert the instance rather than confirming it, and ensure we see the
+        # old NUMA topology
+
+        # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should
+        # probably be less...dumb
+        with mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
+                        '.migrate_disk_and_power_off', return_value='{}'):
+            post = {'revertResize': {}}
+            self.api.post_server_action(server['id'], post)
+
+        server = self._wait_for_state_change(server, 'ACTIVE')
+
+        # We don't have a filter call to check, but we can check that the
+        # request spec changes were reverted
+        self._inspect_request_spec(server, cell_count=1)
+
    def test_resize_vcpu_to_pcpu(self):
        """Create an unpinned instance and resize it to a flavor with pinning.

--- a/nova/tests/unit/compute/test_compute_api.py
+++ b/nova/tests/unit/compute/test_compute_api.py
@ -118,8 +118,14 @@ class _ComputeAPIUnitTestMixIn(object):
                 }
        if updates:
            flavor.update(updates)
+
+        expected_attrs = None
+        if 'extra_specs' in updates and updates['extra_specs']:
+            expected_attrs = ['extra_specs']
+
        return objects.Flavor._from_db_object(
-            self.context, objects.Flavor(extra_specs={}), flavor)
+            self.context, objects.Flavor(extra_specs={}), flavor,
+            expected_attrs=expected_attrs)

    def _create_instance_obj(self, params=None, flavor=None):
        """Create a test instance."""
@ -162,6 +168,7 @@ class _ComputeAPIUnitTestMixIn(object):
        instance.info_cache = objects.InstanceInfoCache()
        instance.flavor = flavor
        instance.old_flavor = instance.new_flavor = None
+        instance.numa_topology = None

        if params:
            instance.update(params)
@ -1639,8 +1646,9 @@ class _ComputeAPIUnitTestMixIn(object):
    def test_confirm_resize_with_migration_ref(self):
        self._test_confirm_resize(mig_ref_passed=True)

+    @mock.patch('nova.virt.hardware.numa_get_constraints')
    @mock.patch('nova.network.neutron.API.get_requested_resource_for_instance',
-                return_value=mock.sentinel.res_req)
+                return_value=[])
    @mock.patch('nova.availability_zones.get_host_availability_zone',
                return_value='nova')
    @mock.patch('nova.objects.Quotas.check_deltas')
@ -1649,33 +1657,60 @@ class _ComputeAPIUnitTestMixIn(object):
    @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid')
    def _test_revert_resize(
            self, mock_get_reqspec, mock_elevated, mock_get_migration,
-            mock_check, mock_get_host_az, mock_get_requested_resources):
+            mock_check, mock_get_host_az, mock_get_requested_resources,
+            mock_get_numa, same_flavor):
        params = dict(vm_state=vm_states.RESIZED)
        fake_inst = self._create_instance_obj(params=params)
        fake_inst.info_cache.network_info = model.NetworkInfo([
            model.VIF(id=uuids.port1, profile={'allocation': uuids.rp})])
-        fake_inst.old_flavor = fake_inst.flavor
        fake_mig = objects.Migration._from_db_object(
                self.context, objects.Migration(),
                test_migration.fake_db_migration())
+        fake_reqspec = objects.RequestSpec()
+        fake_reqspec.flavor = fake_inst.flavor
+        fake_numa_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=0, cpuset=set([0]), memory=512, pagesize=None,
+                cpu_pinning_raw=None, cpuset_reserved=None, cpu_policy=None,
+                cpu_thread_policy=None)])
+
+        if same_flavor:
+            fake_inst.old_flavor = fake_inst.flavor
+        else:
+            fake_inst.old_flavor = self._create_flavor(
+                id=200, flavorid='new-flavor-id', name='new_flavor',
+                disabled=False, extra_specs={'hw:numa_nodes': '1'})

        mock_elevated.return_value = self.context
        mock_get_migration.return_value = fake_mig
+        mock_get_reqspec.return_value = fake_reqspec
+        mock_get_numa.return_value = fake_numa_topology
+
+        def _check_reqspec():
+            if same_flavor:
+                assert_func = self.assertNotEqual
+            else:
+                assert_func = self.assertEqual
+
+            assert_func(fake_numa_topology, fake_reqspec.numa_topology)
+            assert_func(fake_inst.old_flavor, fake_reqspec.flavor)

        def _check_state(expected_task_state=None):
            self.assertEqual(task_states.RESIZE_REVERTING,
                             fake_inst.task_state)

-        def _check_mig(expected_task_state=None):
+        def _check_mig():
            self.assertEqual('reverting', fake_mig.status)

        with test.nested(
+            mock.patch.object(fake_reqspec, 'save',
+                              side_effect=_check_reqspec),
            mock.patch.object(fake_inst, 'save', side_effect=_check_state),
            mock.patch.object(fake_mig, 'save', side_effect=_check_mig),
            mock.patch.object(self.compute_api, '_record_action_start'),
            mock.patch.object(self.compute_api.compute_rpcapi, 'revert_resize')
-        ) as (mock_inst_save, mock_mig_save, mock_record_action,
-              mock_revert_resize):
+        ) as (mock_reqspec_save, mock_inst_save, mock_mig_save,
+              mock_record_action, mock_revert_resize):
            self.compute_api.revert_resize(self.context, fake_inst)

            mock_elevated.assert_called_once_with()
@ -1685,7 +1720,15 @@ class _ComputeAPIUnitTestMixIn(object):
            mock_mig_save.assert_called_once_with()
            mock_get_reqspec.assert_called_once_with(
                self.context, fake_inst.uuid)
-            mock_get_reqspec.return_value.save.assert_called_once_with()
+            if same_flavor:
+                # if we are not changing flavors through the revert, we
+                # shouldn't attempt to rebuild the NUMA topology since it won't
+                # have changed
+                mock_get_numa.assert_not_called()
+            else:
+                # not so if the flavor *has* changed though
+                mock_get_numa.assert_called_once_with(
+                    fake_inst.old_flavor, mock.ANY)
            mock_record_action.assert_called_once_with(self.context, fake_inst,
                                                       'revertResize')
            mock_revert_resize.assert_called_once_with(
@ -1694,11 +1737,17 @@ class _ComputeAPIUnitTestMixIn(object):
            mock_get_requested_resources.assert_called_once_with(
                self.context, fake_inst.uuid)
            self.assertEqual(
-                mock.sentinel.res_req,
+                [],
                mock_get_reqspec.return_value.requested_resources)

    def test_revert_resize(self):
-        self._test_revert_resize()
+        self._test_revert_resize(same_flavor=False)
+
+    def test_revert_resize_same_flavor(self):
+        """Test behavior when reverting a migration or a resize to the same
+        flavor.
+        """
+        self._test_revert_resize(same_flavor=True)

    @mock.patch('nova.network.neutron.API.get_requested_resource_for_instance')
    @mock.patch('nova.availability_zones.get_host_availability_zone',
@ -1741,6 +1790,7 @@ class _ComputeAPIUnitTestMixIn(object):

    @mock.patch('nova.compute.api.API.get_instance_host_status',
                new=mock.Mock(return_value=fields_obj.HostStatus.UP))
+    @mock.patch('nova.virt.hardware.numa_get_constraints')
    @mock.patch('nova.compute.api.API._allow_resize_to_same_host')
    @mock.patch('nova.compute.utils.is_volume_backed_instance',
                return_value=False)
@ -1759,6 +1809,7 @@ class _ComputeAPIUnitTestMixIn(object):
                     mock_inst_save, mock_count, mock_limit, mock_record,
                     mock_migration, mock_validate, mock_is_vol_backed,
                     mock_allow_resize_to_same_host,
+                     mock_get_numa,
                     flavor_id_passed=True,
                     same_host=False, allow_same_host=False,
                     project_id=None,
@ -1777,10 +1828,16 @@ class _ComputeAPIUnitTestMixIn(object):
            # To test instance w/ different project id than context (admin)
            params['project_id'] = project_id
        fake_inst = self._create_instance_obj(params=params)
+        fake_numa_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=0, cpuset=set([0]), memory=512, pagesize=None,
+                cpu_pinning_raw=None, cpuset_reserved=None, cpu_policy=None,
+                cpu_thread_policy=None)])

        mock_resize = self.useFixture(
            fixtures.MockPatchObject(self.compute_api.compute_task_api,
                                     'resize_instance')).mock
+        mock_get_numa.return_value = fake_numa_topology

        if host_name:
            mock_get_all_by_host.return_value = [objects.ComputeNode(
@ -1788,8 +1845,9 @@ class _ComputeAPIUnitTestMixIn(object):

        current_flavor = fake_inst.get_flavor()
        if flavor_id_passed:
-            new_flavor = self._create_flavor(id=200, flavorid='new-flavor-id',
-                                name='new_flavor', disabled=False)
+            new_flavor = self._create_flavor(
+                id=200, flavorid='new-flavor-id', name='new_flavor',
+                disabled=False, extra_specs={'hw:numa_nodes': '1'})
            if same_flavor:
                new_flavor.id = current_flavor.id
            mock_get_flavor.return_value = new_flavor
@ -1876,6 +1934,11 @@ class _ComputeAPIUnitTestMixIn(object):
                fake_spec.requested_destination.allow_cross_cell_move)
            mock_allow_resize_to_same_host.assert_called_once()

+            if flavor_id_passed and not same_flavor:
+                mock_get_numa.assert_called_once_with(new_flavor, mock.ANY)
+            else:
+                mock_get_numa.assert_not_called()
+
        if host_name:
            mock_get_all_by_host.assert_called_once_with(
                self.context, host_name, True)
--- a/nova/tests/unit/virt/libvirt/fake_imagebackend.py
+++ b/nova/tests/unit/virt/libvirt/fake_imagebackend.py
@ -184,6 +184,12 @@ class ImageBackendFixture(fixtures.Fixture):
        # class.
        image_init.SUPPORTS_CLONE = False

+        # Ditto for the 'is_shared_block_storage' function
+        def is_shared_block_storage():
+            return False
+
+        setattr(image_init, 'is_shared_block_storage', is_shared_block_storage)
+
        return image_init

    def _fake_cache(self, fetch_func, filename, size=None, *args, **kwargs):