Add reshaper for PCPU

Added upgrade code using reshape to report PCPU instead of VCPU in case the host is configured to use pinned CPUs. With this, when an existing compute node running guests which uses dedicated CPUs is upgraded to Train release, it will update allocation records of existing guest from VCPU to PCPU using the reshape functionality. Part of blueprint cpu-resources Change-Id: I25d70aa09080b22d1bfa0aa097f0a114de8bf15a Co-Authored-By: Stephen Finucane <sfinucan@redhat.com>
2019-01-17 15:15:13 +09:00 · 2019-01-17 15:15:13 +09:00 · 242a894348
parent 35fae09b22
commit 242a894348
4 changed files with 663 additions and 8 deletions
--- a/nova/tests/functional/integrated_helpers.py
+++ b/nova/tests/functional/integrated_helpers.py
@ -208,7 +208,9 @@ class _IntegratedTestBase(test.TestCase):
    def _build_server(self, flavor_id, image=None):
        server = {}
        if image is None:
-            image = self.api.get_images()[0]
+            # TODO(stephenfin): We need to stop relying on this API
+            with utils.temporary_mutation(self.api, microversion='2.35'):
+                image = self.api.get_images()[0]
            LOG.debug("Image: %s", image)

            # We now have a valid imageId
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@ -112,6 +112,7 @@ class NUMAServersTest(NUMAServersTestBase):
            self.assertEqual(expected_usage, compute_usage)

        self.assertEqual(end_status, found_server['status'])
+
        self.addCleanup(self._delete_server, created_server_id)
        return created_server

@ -502,6 +503,287 @@ class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest):
        super(NUMAServersTest, self).setUp()


+class ReshapeForPCPUsTest(NUMAServersTestBase):
+
+    api_major_version = 'v2.1'
+
+    # TODO(stephenfin): We're using this because we want to be able to force
+    # the host during scheduling. We should instead look at overriding policy
+    ADMIN_API = True
+
+    def test_vcpu_to_pcpu_reshape(self):
+        """Verify that VCPU to PCPU reshape works.
+
+        This rather complex test checks that everything is wired up properly
+        by the reshape operation.
+
+        1) create two pinned servers with an old tree where the compute
+           provider is reporting VCPUs and the servers are consuming the same
+        2) start a migration of one of these servers to another host but don't
+           confirm it
+        3) trigger a reshape
+        4) check that the allocations of both the servers and the migration
+           record on the host are updated
+        5) create another server now against the new tree
+        """
+
+        # we need to use the 'host' parameter when creating servers
+        self.api.microversion = '2.74'
+
+        # we need to configure the legacy 'vcpu_pin_set' config option, rather
+        # than the new ones, to ensure the reshape doesn't happen yet
+
+        self.flags(cpu_dedicated_set=None, cpu_shared_set=None,
+                   group='compute')
+        self.flags(vcpu_pin_set='0-7')
+
+        host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
+                                         cpu_cores=2, cpu_threads=2,
+                                         kB_mem=15740000)
+
+        # Start services
+        self.computes = {}
+        self.compute_rp_uuids = {}
+        for host in ['test_compute0', 'test_compute1']:
+            fake_connection = self._get_connection(
+                host_info=host_info, hostname=host)
+
+            # This is fun. Firstly we need to do a global'ish mock so we can
+            # actually start the service.
+            with mock.patch('nova.virt.libvirt.host.Host.get_connection',
+                            return_value=fake_connection):
+                compute = self.start_service('compute', host=host)
+
+            # Once that's done, we need to do some tweaks to each individual
+            # compute "service" to make sure they return unique objects
+            compute.driver._host.get_connection = lambda: fake_connection
+            self.computes[host] = compute
+
+            # and save the UUIDs for the corresponding resource providers
+            self.compute_rp_uuids[host] = self.placement_api.get(
+                '/resource_providers?name=%s' % host).body[
+                'resource_providers'][0]['uuid']
+
+        # ensure there is no PCPU inventory being reported
+
+        for host, compute_rp_uuid in self.compute_rp_uuids.items():
+            compute_inventory = self.placement_api.get(
+                '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                    'inventories']
+            self.assertEqual(8, compute_inventory['VCPU']['total'])
+            self.assertNotIn('PCPU', compute_inventory)
+
+        # now we boot two servers with pinning, which should boot even without
+        # PCPUs since we're not doing the translation yet
+
+        extra_spec = {'hw:cpu_policy': 'dedicated'}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+
+        server_req = self._build_server(flavor_id)
+        server_req['host'] = 'test_compute0'
+        server_req['networks'] = 'auto'
+
+        created_server1 = self.api.post_server({'server': server_req})
+        server1 = self._wait_for_state_change(created_server1, 'ACTIVE')
+
+        created_server2 = self.api.post_server({'server': server_req})
+        server2 = self._wait_for_state_change(created_server2, 'ACTIVE')
+
+        # sanity check usages
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute0']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(4, compute_usages['VCPU'])
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute1']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(0, compute_usages['VCPU'])
+
+        # now initiate the migration process for one of the servers
+
+        # FIXME(stephenfin): This is a hack due to the poor behavior of the
+        # '_wait_for_state_change' implementation here, which doesn't actually
+        # wait for a transition _to_ a state. I'll be fixing this real soon.
+        import time
+        time.sleep(0.5)
+
+        with mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
+                        '.migrate_disk_and_power_off', return_value='{}'):
+            post = {'migrate': None}
+            self.api.post_server_action(server2['id'], post)
+
+        server2 = self._wait_for_state_change(server2, 'VERIFY_RESIZE')
+
+        # verify that the inventory, usages and allocation are correct before
+        # the reshape. Note that the value of 8 VCPUs is derived from
+        # fakelibvirt.HostInfo with our overridden values
+
+        # first, check 'test_compute0', which should have the allocations for
+        # server1 (the one that hasn't been migrated) and for the migration
+        # record of server2 (the one that has been migrated)
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute0']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        self.assertEqual(8, compute_inventory['VCPU']['total'])
+        self.assertNotIn('PCPU', compute_inventory)
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(4, compute_usages['VCPU'])
+        self.assertNotIn('PCPU', compute_usages)
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server1['id']).body['allocations']
+        # the flavor has disk=10 and ephemeral=10
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+
+        # then check 'test_compute1', which should have the allocations for
+        # server2 (the one that has been migrated)
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute1']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        self.assertEqual(8, compute_inventory['VCPU']['total'])
+        self.assertNotIn('PCPU', compute_inventory)
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(2, compute_usages['VCPU'])
+        self.assertNotIn('PCPU', compute_usages)
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server2['id']).body['allocations']
+        # the flavor has disk=10 and ephemeral=10
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+
+        # set the new config options on the compute services and restart them,
+        # meaning the compute services will now report PCPUs and reshape
+        # existing inventory to use them
+
+        self.flags(cpu_dedicated_set='0-7', group='compute')
+        self.flags(vcpu_pin_set=None)
+
+        for host in ['test_compute0', 'test_compute1']:
+            self.computes[host].stop()
+
+            fake_connection = self._get_connection(
+                host_info=host_info, hostname=host)
+
+            # This is fun. Firstly we need to do a global'ish mock so we can
+            # actually start the service.
+            with mock.patch('nova.virt.libvirt.host.Host.get_connection',
+                            return_value=fake_connection):
+                compute = self.start_service('compute', host=host)
+
+            # Once that's done, we need to do some tweaks to each individual
+            # compute "service" to make sure they return unique objects
+            compute.driver._host.get_connection = lambda: fake_connection
+            self.computes[host] = compute
+
+        # verify that the inventory, usages and allocation are correct after
+        # the reshape
+
+        # first, check 'test_compute0', which should have the allocations for
+        # server1 (the one that hasn't been migrated) and for the migration
+        # record of server2 (the one that has been migrated)
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute0']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        self.assertEqual(8, compute_inventory['PCPU']['total'])
+        self.assertNotIn('VCPU', compute_inventory)
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(4, compute_usages['PCPU'])
+        self.assertNotIn('VCPU', compute_usages)
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server1['id']).body['allocations']
+        # the flavor has disk=10 and ephemeral=10
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+
+        # then check 'test_compute1', which should have the allocations for
+        # server2 (the one that has been migrated)
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute1']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        self.assertEqual(8, compute_inventory['PCPU']['total'])
+        self.assertNotIn('VCPU', compute_inventory)
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(2, compute_usages['PCPU'])
+        self.assertNotIn('VCPU', compute_usages)
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server2['id']).body['allocations']
+        # the flavor has disk=10 and ephemeral=10
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+
+        # now create one more instance with pinned instances against the
+        # reshaped tree which should result in PCPU allocations
+
+        created_server = self.api.post_server({'server': server_req})
+        server3 = self._wait_for_state_change(created_server, 'ACTIVE')
+
+        compute_rp_uuid = self.compute_rp_uuids['test_compute0']
+
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+                'inventories']
+        self.assertEqual(8, compute_inventory['PCPU']['total'])
+        self.assertNotIn('VCPU', compute_inventory)
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+                'usages']
+        self.assertEqual(6, compute_usages['PCPU'])
+        self.assertNotIn('VCPU', compute_usages)
+
+        # check the allocations for this server specifically
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server3['id']).body[
+                'allocations']
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+
+        self._delete_server(server1['id'])
+        self._delete_server(server2['id'])
+        self._delete_server(server3['id'])
+
+
 class NUMAServersWithNetworksTest(NUMAServersTestBase):

    def setUp(self):
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@ -28,6 +28,7 @@ import random
 import re
 import shutil
 import signal
+import testtools
 import threading
 import time
 import unittest
@ -900,6 +901,7 @@ def _create_test_instance():
        'ephemeral_key_uuid': None,
        'vcpu_model': None,
        'host': 'fake-host',
+        'node': 'fake-node',
        'task_state': None,
        'vm_state': None,
        'trusted_certs': None,
@ -19726,6 +19728,9 @@ class TestUpdateProviderTree(test.NoDBTestCase):
                new=mock.Mock(return_value=range(pcpus)))
    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
                new=mock.Mock(return_value=range(vcpus)))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                '_update_provider_tree_for_pcpu',
+                new=mock.Mock())
    def _test_update_provider_tree(
            self, mock_gpu_invs, gpu_invs=None, vpmems=None):
        if gpu_invs:
@ -19923,6 +19928,9 @@ class TestUpdateProviderTree(test.NoDBTestCase):
                new=mock.Mock(return_value=range(pcpus)))
    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
                new=mock.Mock(return_value=range(vcpus)))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                '_update_provider_tree_for_pcpu',
+                new=mock.Mock())
    def test_update_provider_tree_for_vgpu_reshape(
            self, mock_gpus, mock_get_devs, mock_get_mdev_info):
        """Tests the VGPU reshape scenario."""
@ -19978,24 +19986,25 @@ class TestUpdateProviderTree(test.NoDBTestCase):
        allocations = {
            uuids.consumer1: {
                'allocations': {
-                    # This consumer has ram and vgpu allocations on the root
-                    # node provider and should be changed.
+                    # This consumer has vGPU allocations on the root
+                    # node provider and *should* be changed.
                    self.cn_rp['uuid']: {
                        'resources': {
                            orc.MEMORY_MB: 512,
-                            orc.VGPU: 1
+                            orc.VCPU: 2,
+                            orc.VGPU: 1,
                        }
                    }
                }
            },
            uuids.consumer2: {
                'allocations': {
-                    # This consumer has ram and vcpu allocations on the root
-                    # node provider and should not be changed.
+                    # This consumer has no vGPU allocations on the root
+                    # node provider and *should not* be changed.
                    self.cn_rp['uuid']: {
                        'resources': {
                            orc.MEMORY_MB: 256,
-                            orc.VCPU: 2
+                            orc.VCPU: 2,
                        }
                    }
                }
@ -20031,7 +20040,7 @@ class TestUpdateProviderTree(test.NoDBTestCase):
        # provider.
        consumer1_allocs = allocations[uuids.consumer1]['allocations']
        self.assertEqual(2, len(consumer1_allocs))
-        self.assertEqual({orc.MEMORY_MB: 512},
+        self.assertEqual({orc.MEMORY_MB: 512, orc.VCPU: 2},
                         consumer1_allocs[self.cn_rp['uuid']]['resources'])
        # Make sure the VGPU allocation moved to the corresponding child RP
        self.assertEqual(
@ -20112,6 +20121,220 @@ class TestUpdateProviderTree(test.NoDBTestCase):
        self.assertIn('Unexpected VGPU resource allocation on provider %s'
                      % uuids.other_rp, six.text_type(ex))

+    @mock.patch('nova.objects.instance.Instance.get_by_uuid')
+    @mock.patch('nova.objects.migration.MigrationList'
+                '.get_in_progress_by_host_and_node')
+    @mock.patch('nova.objects.instance.InstanceList.get_by_host')
+    @mock.patch('nova.objects.compute_node.ComputeNode.get_by_nodename')
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                '_update_provider_tree_for_vgpu',
+                new=mock.Mock())
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_cpu_traits',
+                new=mock.Mock(return_value=cpu_traits))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info',
+                new=mock.Mock(return_value={'total': disk_gb}))
+    @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total',
+                new=mock.Mock(return_value=memory_mb))
+    @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus',
+                new=mock.Mock(return_value=range(pcpus + vcpus)))
+    def test_update_provider_tree_for_pcpu_reshape(self,
+            mock_get_cn, mock_get_instances, mock_get_migrations,
+            mock_get_instance):
+        """Tests the CPU reshape scenario."""
+
+        # configure the 'cpu_dedicated_set' and 'cpu_shared_set' fields to
+        # something useful. Note that because we're setting this, we haven't
+        # mocked out '_get_pcpu_available' and '_get_vcpu_available' but
+        # instead have mocked out 'get_online_cpus'. This isn't very "unit
+        # testy" but it's a more realistic test
+
+        self.flags(cpu_shared_set='0-11,18-29',
+                   cpu_dedicated_set='12-17,30-35',
+                   group='compute')
+
+        # define a host topology with a single NUMA cell and four cores
+
+        numa_topology = objects.NUMATopology(cells=[
+            objects.NUMACell(
+                id=0,
+                cpuset=set(range(0, 12)) | set(range(18, 30)),
+                pcpuset=set(range(12, 18)) | set(range(30, 36)),
+                memory=8192,
+                cpu_usage=6,
+                memory_usage=0,
+                mempages=[],
+                siblings=[],  # no hyperthreading
+                pinned_cpus=set([2, 3]))])
+
+        cn = objects.ComputeNode(
+            uuid=self.cn_rp['uuid'],
+            host='host1',
+            hypervisor_hostname=self.cn_rp['name'],
+            numa_topology=numa_topology._to_json(),
+        )
+
+        # define three instances, one with a NUMA topology but no pinning, one
+        # with pinning (and an implicit NUMA topology), and one with neither
+        # pinning nor a NUMA topology. In practice, this shouldn't happen since
+        # they should all be on separate hosts, but since we don't enforce that
+        # we can't rely on it
+
+        base_instance = _create_test_instance()
+        base_instance.pop('id')
+        base_instance.pop('uuid')
+
+        instance_a = objects.Instance(
+            id=1, uuid=uuids.instance_a, **base_instance)
+        instance_a.numa_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=0,
+                cpuset=set([0, 1]),
+                memory=1024)])
+
+        instance_b = objects.Instance(
+            id=2, uuid=uuids.instance_b, **base_instance)
+        instance_b.numa_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=0,
+                cpuset=set([0, 1]),
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_pinning={0: 2, 1: 3},
+                memory=1024)])
+
+        instance_c = objects.Instance(
+            id=3, uuid=uuids.instance_c, **base_instance)
+        instance_c.numa_topology = None
+
+        instances = objects.InstanceList(objects=[
+            instance_a, instance_b, instance_c])
+
+        instance_d = objects.Instance(
+            id=4, uuid=uuids.instance_d, **base_instance)
+        instance_d.numa_topology = objects.InstanceNUMATopology(cells=[
+            objects.InstanceNUMACell(
+                id=0,
+                cpuset=set([0, 1]),
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_pinning={0: 0, 1: 1},
+                memory=1024)])
+
+        migration = objects.Migration(
+            id=42,
+            uuid=uuids.migration,
+            source_compute=cn.host,
+            dest_compute='host2',
+            instance_uuid=instance_d.uuid)
+
+        migrations = objects.MigrationList(objects=[migration])
+
+        # use the ComputeNode and InstanceList objects in our mocks
+
+        mock_get_cn.return_value = cn
+        mock_get_instances.return_value = instances
+
+        # ditto for the migration and corresponding instance (which is
+        # theoretically on another host now, but still has allocation here)
+
+        mock_get_instance.return_value = instance_d
+        mock_get_migrations.return_value = migrations
+
+        # mock the inventory of an pre-Trait compute node, where PCPUs have not
+        # yet been reported
+
+        initial_inventory = self._get_inventory()
+        expected_inventory = copy.copy(initial_inventory)
+
+        initial_inventory.pop(orc.PCPU)
+        self.pt.update_inventory(cn.uuid, initial_inventory)
+
+        # call 'update_provider_tree' to ensure it raises 'ReshapeNeeded'
+        # since there is a reshape needed and no allocations provided
+
+        with testtools.ExpectedException(exception.ReshapeNeeded):
+            self.driver.update_provider_tree(
+                  self.pt, cn.hypervisor_hostname)
+
+        # now prepare the allocations, which are all using VCPUs since we
+        # haven't reshaped anything yet
+
+        allocations = {
+            uuids.instance_a: {
+                'allocations': {
+                    cn.uuid: {
+                        'resources': {
+                            orc.MEMORY_MB: 1000,
+                            orc.VCPU: 2,
+                            orc.DISK_GB: 30,
+                        }
+                    }
+                }
+            },
+            uuids.instance_b: {
+                'allocations': {
+                    cn.uuid: {
+                        'resources': {
+                            orc.MEMORY_MB: 1000,
+                            orc.VCPU: 2,
+                            orc.DISK_GB: 30,
+                        }
+                    }
+                }
+            },
+            uuids.instance_c: {
+                'allocations': {
+                    cn.uuid: {
+                        'resources': {
+                            orc.MEMORY_MB: 1000,
+                            orc.VCPU: 2,
+                            orc.DISK_GB: 30,
+                        }
+                    }
+                }
+            },
+            uuids.migration: {
+                'allocations': {
+                    cn.uuid: {
+                        'resources': {
+                            orc.MEMORY_MB: 1000,
+                            orc.VCPU: 2,
+                            orc.DISK_GB: 30,
+                        }
+                    }
+                }
+            },
+        }
+
+        # post reshape, only the allocations for instance_b and the migration
+        # should change since those are the only instances (by way of
+        # instance_d in the case of the migration) with CPU pinning
+
+        expected_allocations = copy.deepcopy(allocations)
+        expected_allocations[uuids.instance_b]['allocations'][cn.uuid] = {
+            'resources': {
+                orc.MEMORY_MB: 1000,
+                orc.PCPU: 2,
+                orc.DISK_GB: 30,
+            }
+        }
+        expected_allocations[uuids.migration]['allocations'][cn.uuid] = {
+            'resources': {
+                orc.MEMORY_MB: 1000,
+                orc.PCPU: 2,
+                orc.DISK_GB: 30,
+            }
+        }
+
+        # initiate the reshape
+
+        self.driver.update_provider_tree(
+            self.pt, cn.hypervisor_hostname, allocations=allocations)
+
+        # check both the VCPU and PCPU inventory are now reported and the
+        # allocations have been updated
+
+        self.assertEqual(expected_inventory, self.pt.data(cn.uuid).inventory)
+        self.assertEqual(expected_allocations, allocations)
+

 class TraitsComparisonMixin(object):

--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@ -7301,6 +7301,9 @@ class LibvirtDriver(driver.ComputeDriver):
        self._update_provider_tree_for_vgpu(
           provider_tree, nodename, allocations=allocations)

+        self._update_provider_tree_for_pcpu(
+            provider_tree, nodename, allocations=allocations)
+
        self._update_provider_tree_for_vpmems(
            provider_tree, nodename, result, resources)

@ -7709,6 +7712,151 @@ class LibvirtDriver(driver.ComputeDriver):
                del root_node.inventory[orc.VGPU]
                provider_tree.update_inventory(nodename, root_node.inventory)

+    def _update_provider_tree_for_pcpu(self, provider_tree, nodename,
+                                       allocations=None):
+        """Updates the provider tree for PCPU inventory.
+
+        Before Train, pinned instances consumed VCPU inventory just like
+        unpinned instances. Starting in Train, these instances now consume PCPU
+        inventory. The function can reshape the inventory, changing allocations
+        of VCPUs to PCPUs.
+
+        :param provider_tree: The ProviderTree to update.
+        :param nodename: The ComputeNode.hypervisor_hostname, also known as
+            the name of the root node provider in the tree for this host.
+        :param allocations: A dict, keyed by consumer UUID, of allocation
+            records, or None::
+
+                {
+                    $CONSUMER_UUID: {
+                        "allocations": {
+                            $RP_UUID: {
+                                "generation": $RP_GEN,
+                                "resources": {
+                                    $RESOURCE_CLASS: $AMOUNT,
+                                    ...
+                                },
+                            },
+                            ...
+                        },
+                        "project_id": $PROJ_ID,
+                        "user_id": $USER_ID,
+                        "consumer_generation": $CONSUMER_GEN,
+                    },
+                    ...
+                }
+
+            If provided, this indicates a reshape was requested and should be
+            performed.
+        :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and
+            the method determines a reshape of the tree is needed, i.e. VCPU
+            inventory and allocations must be migrated to PCPU resources.
+        :raises: nova.exception.ReshapeFailed if the requested tree reshape
+            fails for whatever reason.
+        """
+        # If we're not configuring PCPUs, then we've nothing to worry about
+        # (yet)
+        if not CONF.compute.cpu_dedicated_set:
+            return
+
+        root_node = provider_tree.data(nodename)
+
+        # Similarly, if PCPU inventories are already reported then there is no
+        # need to reshape
+        if orc.PCPU in root_node.inventory:
+            return
+
+        ctx = nova_context.get_admin_context()
+        compute_node = objects.ComputeNode.get_by_nodename(ctx, nodename)
+
+        # Finally, if the compute node doesn't appear to support NUMA, move
+        # swiftly on
+        if not compute_node.numa_topology:
+            return
+
+        # The ComputeNode.numa_topology is a StringField, deserialize
+        numa = objects.NUMATopology.obj_from_db_obj(compute_node.numa_topology)
+
+        # If the host doesn't know of any pinned CPUs, we can continue
+        if not any(cell.pinned_cpus for cell in numa.cells):
+            return
+
+        # At this point, we know there's something to be migrated here but not
+        # how much. If the allocations are None, we're at the startup of the
+        # compute node and a Reshape is needed. Indicate this by raising the
+        # ReshapeNeeded exception
+
+        if allocations is None:
+            LOG.info(
+                'Requesting provider tree reshape in order to move '
+                'VCPU to PCPU allocations to the compute node '
+                'provider %s', nodename)
+            raise exception.ReshapeNeeded()
+
+        # Go figure out how many VCPUs to migrate to PCPUs. We've been telling
+        # people for years *not* to mix pinned and unpinned instances, meaning
+        # we should be able to move all VCPUs to PCPUs, but we never actually
+        # enforced this in code and there's an all-too-high chance someone
+        # didn't get the memo
+
+        allocations_needing_reshape = []
+
+        # we need to tackle the allocations against instances on this host...
+
+        instances = objects.InstanceList.get_by_host(
+            ctx, compute_node.host, expected_attrs=['numa_topology'])
+        for instance in instances:
+            if not instance.numa_topology:
+                continue
+
+            if not instance.numa_topology.cpu_pinning_requested:
+                continue
+
+            allocations_needing_reshape.append(instance.uuid)
+
+        # ...and those for any migrations
+
+        migrations = objects.MigrationList.get_in_progress_by_host_and_node(
+            ctx, compute_node.host, compute_node.hypervisor_hostname)
+        for migration in migrations:
+            # we don't care about migrations that have landed here, since we
+            # already have those instances above
+            if not migration.dest_compute or (
+                    migration.dest_compute == compute_node.host):
+                continue
+
+            instance = objects.Instance.get_by_uuid(
+                ctx, migration.instance_uuid, expected_attrs=['numa_topology'])
+
+            if not instance.numa_topology:
+                continue
+
+            if not instance.numa_topology.cpu_pinning_requested:
+                continue
+
+            allocations_needing_reshape.append(migration.uuid)
+
+        for allocation_uuid in allocations_needing_reshape:
+            consumer_allocations = allocations.get(allocation_uuid, {}).get(
+                'allocations', {})
+            # TODO(stephenfin): We can probably just check the allocations for
+            # ComputeNode.uuid since compute nodes are the only (?) provider of
+            # VCPU and PCPU resources
+            for rp_uuid in consumer_allocations:
+                resources = consumer_allocations[rp_uuid]['resources']
+
+                if orc.PCPU in resources or orc.VCPU not in resources:
+                    # Either this has been migrated or it's not a compute node
+                    continue
+
+                # Switch stuff around. We can do a straight swap since an
+                # instance is either pinned or unpinned. By doing this, we're
+                # modifying the provided 'allocations' dict, which will
+                # eventually be used by the resource tracker to update
+                # placement
+                resources['PCPU'] = resources['VCPU']
+                del resources[orc.VCPU]
+
    def get_available_resource(self, nodename):
        """Retrieve resource information.