Merge "Add reshaper for PCPU"
This commit is contained in:
commit
0ff3fd9bf4
@ -208,7 +208,9 @@ class _IntegratedTestBase(test.TestCase):
|
||||
def _build_server(self, flavor_id, image=None):
|
||||
server = {}
|
||||
if image is None:
|
||||
image = self.api.get_images()[0]
|
||||
# TODO(stephenfin): We need to stop relying on this API
|
||||
with utils.temporary_mutation(self.api, microversion='2.35'):
|
||||
image = self.api.get_images()[0]
|
||||
LOG.debug("Image: %s", image)
|
||||
|
||||
# We now have a valid imageId
|
||||
|
@ -112,6 +112,7 @@ class NUMAServersTest(NUMAServersTestBase):
|
||||
self.assertEqual(expected_usage, compute_usage)
|
||||
|
||||
self.assertEqual(end_status, found_server['status'])
|
||||
|
||||
self.addCleanup(self._delete_server, created_server_id)
|
||||
return created_server
|
||||
|
||||
@ -502,6 +503,287 @@ class NUMAServerTestWithCountingQuotaFromPlacement(NUMAServersTest):
|
||||
super(NUMAServersTest, self).setUp()
|
||||
|
||||
|
||||
class ReshapeForPCPUsTest(NUMAServersTestBase):
|
||||
|
||||
api_major_version = 'v2.1'
|
||||
|
||||
# TODO(stephenfin): We're using this because we want to be able to force
|
||||
# the host during scheduling. We should instead look at overriding policy
|
||||
ADMIN_API = True
|
||||
|
||||
def test_vcpu_to_pcpu_reshape(self):
|
||||
"""Verify that VCPU to PCPU reshape works.
|
||||
|
||||
This rather complex test checks that everything is wired up properly
|
||||
by the reshape operation.
|
||||
|
||||
1) create two pinned servers with an old tree where the compute
|
||||
provider is reporting VCPUs and the servers are consuming the same
|
||||
2) start a migration of one of these servers to another host but don't
|
||||
confirm it
|
||||
3) trigger a reshape
|
||||
4) check that the allocations of both the servers and the migration
|
||||
record on the host are updated
|
||||
5) create another server now against the new tree
|
||||
"""
|
||||
|
||||
# we need to use the 'host' parameter when creating servers
|
||||
self.api.microversion = '2.74'
|
||||
|
||||
# we need to configure the legacy 'vcpu_pin_set' config option, rather
|
||||
# than the new ones, to ensure the reshape doesn't happen yet
|
||||
|
||||
self.flags(cpu_dedicated_set=None, cpu_shared_set=None,
|
||||
group='compute')
|
||||
self.flags(vcpu_pin_set='0-7')
|
||||
|
||||
host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
|
||||
cpu_cores=2, cpu_threads=2,
|
||||
kB_mem=15740000)
|
||||
|
||||
# Start services
|
||||
self.computes = {}
|
||||
self.compute_rp_uuids = {}
|
||||
for host in ['test_compute0', 'test_compute1']:
|
||||
fake_connection = self._get_connection(
|
||||
host_info=host_info, hostname=host)
|
||||
|
||||
# This is fun. Firstly we need to do a global'ish mock so we can
|
||||
# actually start the service.
|
||||
with mock.patch('nova.virt.libvirt.host.Host.get_connection',
|
||||
return_value=fake_connection):
|
||||
compute = self.start_service('compute', host=host)
|
||||
|
||||
# Once that's done, we need to do some tweaks to each individual
|
||||
# compute "service" to make sure they return unique objects
|
||||
compute.driver._host.get_connection = lambda: fake_connection
|
||||
self.computes[host] = compute
|
||||
|
||||
# and save the UUIDs for the corresponding resource providers
|
||||
self.compute_rp_uuids[host] = self.placement_api.get(
|
||||
'/resource_providers?name=%s' % host).body[
|
||||
'resource_providers'][0]['uuid']
|
||||
|
||||
# ensure there is no PCPU inventory being reported
|
||||
|
||||
for host, compute_rp_uuid in self.compute_rp_uuids.items():
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['VCPU']['total'])
|
||||
self.assertNotIn('PCPU', compute_inventory)
|
||||
|
||||
# now we boot two servers with pinning, which should boot even without
|
||||
# PCPUs since we're not doing the translation yet
|
||||
|
||||
extra_spec = {'hw:cpu_policy': 'dedicated'}
|
||||
flavor_id = self._create_flavor(extra_spec=extra_spec)
|
||||
|
||||
server_req = self._build_server(flavor_id)
|
||||
server_req['host'] = 'test_compute0'
|
||||
server_req['networks'] = 'auto'
|
||||
|
||||
created_server1 = self.api.post_server({'server': server_req})
|
||||
server1 = self._wait_for_state_change(created_server1, 'ACTIVE')
|
||||
|
||||
created_server2 = self.api.post_server({'server': server_req})
|
||||
server2 = self._wait_for_state_change(created_server2, 'ACTIVE')
|
||||
|
||||
# sanity check usages
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute0']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(4, compute_usages['VCPU'])
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute1']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(0, compute_usages['VCPU'])
|
||||
|
||||
# now initiate the migration process for one of the servers
|
||||
|
||||
# FIXME(stephenfin): This is a hack due to the poor behavior of the
|
||||
# '_wait_for_state_change' implementation here, which doesn't actually
|
||||
# wait for a transition _to_ a state. I'll be fixing this real soon.
|
||||
import time
|
||||
time.sleep(0.5)
|
||||
|
||||
with mock.patch('nova.virt.libvirt.driver.LibvirtDriver'
|
||||
'.migrate_disk_and_power_off', return_value='{}'):
|
||||
post = {'migrate': None}
|
||||
self.api.post_server_action(server2['id'], post)
|
||||
|
||||
server2 = self._wait_for_state_change(server2, 'VERIFY_RESIZE')
|
||||
|
||||
# verify that the inventory, usages and allocation are correct before
|
||||
# the reshape. Note that the value of 8 VCPUs is derived from
|
||||
# fakelibvirt.HostInfo with our overridden values
|
||||
|
||||
# first, check 'test_compute0', which should have the allocations for
|
||||
# server1 (the one that hasn't been migrated) and for the migration
|
||||
# record of server2 (the one that has been migrated)
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute0']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['VCPU']['total'])
|
||||
self.assertNotIn('PCPU', compute_inventory)
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(4, compute_usages['VCPU'])
|
||||
self.assertNotIn('PCPU', compute_usages)
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server1['id']).body['allocations']
|
||||
# the flavor has disk=10 and ephemeral=10
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
# then check 'test_compute1', which should have the allocations for
|
||||
# server2 (the one that has been migrated)
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute1']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['VCPU']['total'])
|
||||
self.assertNotIn('PCPU', compute_inventory)
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(2, compute_usages['VCPU'])
|
||||
self.assertNotIn('PCPU', compute_usages)
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server2['id']).body['allocations']
|
||||
# the flavor has disk=10 and ephemeral=10
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
# set the new config options on the compute services and restart them,
|
||||
# meaning the compute services will now report PCPUs and reshape
|
||||
# existing inventory to use them
|
||||
|
||||
self.flags(cpu_dedicated_set='0-7', group='compute')
|
||||
self.flags(vcpu_pin_set=None)
|
||||
|
||||
for host in ['test_compute0', 'test_compute1']:
|
||||
self.computes[host].stop()
|
||||
|
||||
fake_connection = self._get_connection(
|
||||
host_info=host_info, hostname=host)
|
||||
|
||||
# This is fun. Firstly we need to do a global'ish mock so we can
|
||||
# actually start the service.
|
||||
with mock.patch('nova.virt.libvirt.host.Host.get_connection',
|
||||
return_value=fake_connection):
|
||||
compute = self.start_service('compute', host=host)
|
||||
|
||||
# Once that's done, we need to do some tweaks to each individual
|
||||
# compute "service" to make sure they return unique objects
|
||||
compute.driver._host.get_connection = lambda: fake_connection
|
||||
self.computes[host] = compute
|
||||
|
||||
# verify that the inventory, usages and allocation are correct after
|
||||
# the reshape
|
||||
|
||||
# first, check 'test_compute0', which should have the allocations for
|
||||
# server1 (the one that hasn't been migrated) and for the migration
|
||||
# record of server2 (the one that has been migrated)
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute0']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['PCPU']['total'])
|
||||
self.assertNotIn('VCPU', compute_inventory)
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(4, compute_usages['PCPU'])
|
||||
self.assertNotIn('VCPU', compute_usages)
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server1['id']).body['allocations']
|
||||
# the flavor has disk=10 and ephemeral=10
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
# then check 'test_compute1', which should have the allocations for
|
||||
# server2 (the one that has been migrated)
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute1']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['PCPU']['total'])
|
||||
self.assertNotIn('VCPU', compute_inventory)
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(2, compute_usages['PCPU'])
|
||||
self.assertNotIn('VCPU', compute_usages)
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server2['id']).body['allocations']
|
||||
# the flavor has disk=10 and ephemeral=10
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
# now create one more instance with pinned instances against the
|
||||
# reshaped tree which should result in PCPU allocations
|
||||
|
||||
created_server = self.api.post_server({'server': server_req})
|
||||
server3 = self._wait_for_state_change(created_server, 'ACTIVE')
|
||||
|
||||
compute_rp_uuid = self.compute_rp_uuids['test_compute0']
|
||||
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(8, compute_inventory['PCPU']['total'])
|
||||
self.assertNotIn('VCPU', compute_inventory)
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(6, compute_usages['PCPU'])
|
||||
self.assertNotIn('VCPU', compute_usages)
|
||||
|
||||
# check the allocations for this server specifically
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server3['id']).body[
|
||||
'allocations']
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
self._delete_server(server1['id'])
|
||||
self._delete_server(server2['id'])
|
||||
self._delete_server(server3['id'])
|
||||
|
||||
|
||||
class NUMAServersWithNetworksTest(NUMAServersTestBase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -28,6 +28,7 @@ import random
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import testtools
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
@ -900,6 +901,7 @@ def _create_test_instance():
|
||||
'ephemeral_key_uuid': None,
|
||||
'vcpu_model': None,
|
||||
'host': 'fake-host',
|
||||
'node': 'fake-node',
|
||||
'task_state': None,
|
||||
'vm_state': None,
|
||||
'trusted_certs': None,
|
||||
@ -19726,6 +19728,9 @@ class TestUpdateProviderTree(test.NoDBTestCase):
|
||||
new=mock.Mock(return_value=range(pcpus)))
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
|
||||
new=mock.Mock(return_value=range(vcpus)))
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
|
||||
'_update_provider_tree_for_pcpu',
|
||||
new=mock.Mock())
|
||||
def _test_update_provider_tree(
|
||||
self, mock_gpu_invs, gpu_invs=None, vpmems=None):
|
||||
if gpu_invs:
|
||||
@ -19923,6 +19928,9 @@ class TestUpdateProviderTree(test.NoDBTestCase):
|
||||
new=mock.Mock(return_value=range(pcpus)))
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
|
||||
new=mock.Mock(return_value=range(vcpus)))
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
|
||||
'_update_provider_tree_for_pcpu',
|
||||
new=mock.Mock())
|
||||
def test_update_provider_tree_for_vgpu_reshape(
|
||||
self, mock_gpus, mock_get_devs, mock_get_mdev_info):
|
||||
"""Tests the VGPU reshape scenario."""
|
||||
@ -19978,24 +19986,25 @@ class TestUpdateProviderTree(test.NoDBTestCase):
|
||||
allocations = {
|
||||
uuids.consumer1: {
|
||||
'allocations': {
|
||||
# This consumer has ram and vgpu allocations on the root
|
||||
# node provider and should be changed.
|
||||
# This consumer has vGPU allocations on the root
|
||||
# node provider and *should* be changed.
|
||||
self.cn_rp['uuid']: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 512,
|
||||
orc.VGPU: 1
|
||||
orc.VCPU: 2,
|
||||
orc.VGPU: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
uuids.consumer2: {
|
||||
'allocations': {
|
||||
# This consumer has ram and vcpu allocations on the root
|
||||
# node provider and should not be changed.
|
||||
# This consumer has no vGPU allocations on the root
|
||||
# node provider and *should not* be changed.
|
||||
self.cn_rp['uuid']: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 256,
|
||||
orc.VCPU: 2
|
||||
orc.VCPU: 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -20031,7 +20040,7 @@ class TestUpdateProviderTree(test.NoDBTestCase):
|
||||
# provider.
|
||||
consumer1_allocs = allocations[uuids.consumer1]['allocations']
|
||||
self.assertEqual(2, len(consumer1_allocs))
|
||||
self.assertEqual({orc.MEMORY_MB: 512},
|
||||
self.assertEqual({orc.MEMORY_MB: 512, orc.VCPU: 2},
|
||||
consumer1_allocs[self.cn_rp['uuid']]['resources'])
|
||||
# Make sure the VGPU allocation moved to the corresponding child RP
|
||||
self.assertEqual(
|
||||
@ -20112,6 +20121,220 @@ class TestUpdateProviderTree(test.NoDBTestCase):
|
||||
self.assertIn('Unexpected VGPU resource allocation on provider %s'
|
||||
% uuids.other_rp, six.text_type(ex))
|
||||
|
||||
@mock.patch('nova.objects.instance.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.migration.MigrationList'
|
||||
'.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.instance.InstanceList.get_by_host')
|
||||
@mock.patch('nova.objects.compute_node.ComputeNode.get_by_nodename')
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
|
||||
'_update_provider_tree_for_vgpu',
|
||||
new=mock.Mock())
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_cpu_traits',
|
||||
new=mock.Mock(return_value=cpu_traits))
|
||||
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info',
|
||||
new=mock.Mock(return_value={'total': disk_gb}))
|
||||
@mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total',
|
||||
new=mock.Mock(return_value=memory_mb))
|
||||
@mock.patch('nova.virt.libvirt.host.Host.get_online_cpus',
|
||||
new=mock.Mock(return_value=range(pcpus + vcpus)))
|
||||
def test_update_provider_tree_for_pcpu_reshape(self,
|
||||
mock_get_cn, mock_get_instances, mock_get_migrations,
|
||||
mock_get_instance):
|
||||
"""Tests the CPU reshape scenario."""
|
||||
|
||||
# configure the 'cpu_dedicated_set' and 'cpu_shared_set' fields to
|
||||
# something useful. Note that because we're setting this, we haven't
|
||||
# mocked out '_get_pcpu_available' and '_get_vcpu_available' but
|
||||
# instead have mocked out 'get_online_cpus'. This isn't very "unit
|
||||
# testy" but it's a more realistic test
|
||||
|
||||
self.flags(cpu_shared_set='0-11,18-29',
|
||||
cpu_dedicated_set='12-17,30-35',
|
||||
group='compute')
|
||||
|
||||
# define a host topology with a single NUMA cell and four cores
|
||||
|
||||
numa_topology = objects.NUMATopology(cells=[
|
||||
objects.NUMACell(
|
||||
id=0,
|
||||
cpuset=set(range(0, 12)) | set(range(18, 30)),
|
||||
pcpuset=set(range(12, 18)) | set(range(30, 36)),
|
||||
memory=8192,
|
||||
cpu_usage=6,
|
||||
memory_usage=0,
|
||||
mempages=[],
|
||||
siblings=[], # no hyperthreading
|
||||
pinned_cpus=set([2, 3]))])
|
||||
|
||||
cn = objects.ComputeNode(
|
||||
uuid=self.cn_rp['uuid'],
|
||||
host='host1',
|
||||
hypervisor_hostname=self.cn_rp['name'],
|
||||
numa_topology=numa_topology._to_json(),
|
||||
)
|
||||
|
||||
# define three instances, one with a NUMA topology but no pinning, one
|
||||
# with pinning (and an implicit NUMA topology), and one with neither
|
||||
# pinning nor a NUMA topology. In practice, this shouldn't happen since
|
||||
# they should all be on separate hosts, but since we don't enforce that
|
||||
# we can't rely on it
|
||||
|
||||
base_instance = _create_test_instance()
|
||||
base_instance.pop('id')
|
||||
base_instance.pop('uuid')
|
||||
|
||||
instance_a = objects.Instance(
|
||||
id=1, uuid=uuids.instance_a, **base_instance)
|
||||
instance_a.numa_topology = objects.InstanceNUMATopology(cells=[
|
||||
objects.InstanceNUMACell(
|
||||
id=0,
|
||||
cpuset=set([0, 1]),
|
||||
memory=1024)])
|
||||
|
||||
instance_b = objects.Instance(
|
||||
id=2, uuid=uuids.instance_b, **base_instance)
|
||||
instance_b.numa_topology = objects.InstanceNUMATopology(cells=[
|
||||
objects.InstanceNUMACell(
|
||||
id=0,
|
||||
cpuset=set([0, 1]),
|
||||
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
|
||||
cpu_pinning={0: 2, 1: 3},
|
||||
memory=1024)])
|
||||
|
||||
instance_c = objects.Instance(
|
||||
id=3, uuid=uuids.instance_c, **base_instance)
|
||||
instance_c.numa_topology = None
|
||||
|
||||
instances = objects.InstanceList(objects=[
|
||||
instance_a, instance_b, instance_c])
|
||||
|
||||
instance_d = objects.Instance(
|
||||
id=4, uuid=uuids.instance_d, **base_instance)
|
||||
instance_d.numa_topology = objects.InstanceNUMATopology(cells=[
|
||||
objects.InstanceNUMACell(
|
||||
id=0,
|
||||
cpuset=set([0, 1]),
|
||||
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
|
||||
cpu_pinning={0: 0, 1: 1},
|
||||
memory=1024)])
|
||||
|
||||
migration = objects.Migration(
|
||||
id=42,
|
||||
uuid=uuids.migration,
|
||||
source_compute=cn.host,
|
||||
dest_compute='host2',
|
||||
instance_uuid=instance_d.uuid)
|
||||
|
||||
migrations = objects.MigrationList(objects=[migration])
|
||||
|
||||
# use the ComputeNode and InstanceList objects in our mocks
|
||||
|
||||
mock_get_cn.return_value = cn
|
||||
mock_get_instances.return_value = instances
|
||||
|
||||
# ditto for the migration and corresponding instance (which is
|
||||
# theoretically on another host now, but still has allocation here)
|
||||
|
||||
mock_get_instance.return_value = instance_d
|
||||
mock_get_migrations.return_value = migrations
|
||||
|
||||
# mock the inventory of an pre-Trait compute node, where PCPUs have not
|
||||
# yet been reported
|
||||
|
||||
initial_inventory = self._get_inventory()
|
||||
expected_inventory = copy.copy(initial_inventory)
|
||||
|
||||
initial_inventory.pop(orc.PCPU)
|
||||
self.pt.update_inventory(cn.uuid, initial_inventory)
|
||||
|
||||
# call 'update_provider_tree' to ensure it raises 'ReshapeNeeded'
|
||||
# since there is a reshape needed and no allocations provided
|
||||
|
||||
with testtools.ExpectedException(exception.ReshapeNeeded):
|
||||
self.driver.update_provider_tree(
|
||||
self.pt, cn.hypervisor_hostname)
|
||||
|
||||
# now prepare the allocations, which are all using VCPUs since we
|
||||
# haven't reshaped anything yet
|
||||
|
||||
allocations = {
|
||||
uuids.instance_a: {
|
||||
'allocations': {
|
||||
cn.uuid: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.VCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
uuids.instance_b: {
|
||||
'allocations': {
|
||||
cn.uuid: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.VCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
uuids.instance_c: {
|
||||
'allocations': {
|
||||
cn.uuid: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.VCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
uuids.migration: {
|
||||
'allocations': {
|
||||
cn.uuid: {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.VCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
# post reshape, only the allocations for instance_b and the migration
|
||||
# should change since those are the only instances (by way of
|
||||
# instance_d in the case of the migration) with CPU pinning
|
||||
|
||||
expected_allocations = copy.deepcopy(allocations)
|
||||
expected_allocations[uuids.instance_b]['allocations'][cn.uuid] = {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.PCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
expected_allocations[uuids.migration]['allocations'][cn.uuid] = {
|
||||
'resources': {
|
||||
orc.MEMORY_MB: 1000,
|
||||
orc.PCPU: 2,
|
||||
orc.DISK_GB: 30,
|
||||
}
|
||||
}
|
||||
|
||||
# initiate the reshape
|
||||
|
||||
self.driver.update_provider_tree(
|
||||
self.pt, cn.hypervisor_hostname, allocations=allocations)
|
||||
|
||||
# check both the VCPU and PCPU inventory are now reported and the
|
||||
# allocations have been updated
|
||||
|
||||
self.assertEqual(expected_inventory, self.pt.data(cn.uuid).inventory)
|
||||
self.assertEqual(expected_allocations, allocations)
|
||||
|
||||
|
||||
class TraitsComparisonMixin(object):
|
||||
|
||||
|
@ -7301,6 +7301,9 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
self._update_provider_tree_for_vgpu(
|
||||
provider_tree, nodename, allocations=allocations)
|
||||
|
||||
self._update_provider_tree_for_pcpu(
|
||||
provider_tree, nodename, allocations=allocations)
|
||||
|
||||
self._update_provider_tree_for_vpmems(
|
||||
provider_tree, nodename, result, resources)
|
||||
|
||||
@ -7709,6 +7712,151 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
del root_node.inventory[orc.VGPU]
|
||||
provider_tree.update_inventory(nodename, root_node.inventory)
|
||||
|
||||
def _update_provider_tree_for_pcpu(self, provider_tree, nodename,
|
||||
allocations=None):
|
||||
"""Updates the provider tree for PCPU inventory.
|
||||
|
||||
Before Train, pinned instances consumed VCPU inventory just like
|
||||
unpinned instances. Starting in Train, these instances now consume PCPU
|
||||
inventory. The function can reshape the inventory, changing allocations
|
||||
of VCPUs to PCPUs.
|
||||
|
||||
:param provider_tree: The ProviderTree to update.
|
||||
:param nodename: The ComputeNode.hypervisor_hostname, also known as
|
||||
the name of the root node provider in the tree for this host.
|
||||
:param allocations: A dict, keyed by consumer UUID, of allocation
|
||||
records, or None::
|
||||
|
||||
{
|
||||
$CONSUMER_UUID: {
|
||||
"allocations": {
|
||||
$RP_UUID: {
|
||||
"generation": $RP_GEN,
|
||||
"resources": {
|
||||
$RESOURCE_CLASS: $AMOUNT,
|
||||
...
|
||||
},
|
||||
},
|
||||
...
|
||||
},
|
||||
"project_id": $PROJ_ID,
|
||||
"user_id": $USER_ID,
|
||||
"consumer_generation": $CONSUMER_GEN,
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
If provided, this indicates a reshape was requested and should be
|
||||
performed.
|
||||
:raises: nova.exception.ReshapeNeeded if ``allocations`` is None and
|
||||
the method determines a reshape of the tree is needed, i.e. VCPU
|
||||
inventory and allocations must be migrated to PCPU resources.
|
||||
:raises: nova.exception.ReshapeFailed if the requested tree reshape
|
||||
fails for whatever reason.
|
||||
"""
|
||||
# If we're not configuring PCPUs, then we've nothing to worry about
|
||||
# (yet)
|
||||
if not CONF.compute.cpu_dedicated_set:
|
||||
return
|
||||
|
||||
root_node = provider_tree.data(nodename)
|
||||
|
||||
# Similarly, if PCPU inventories are already reported then there is no
|
||||
# need to reshape
|
||||
if orc.PCPU in root_node.inventory:
|
||||
return
|
||||
|
||||
ctx = nova_context.get_admin_context()
|
||||
compute_node = objects.ComputeNode.get_by_nodename(ctx, nodename)
|
||||
|
||||
# Finally, if the compute node doesn't appear to support NUMA, move
|
||||
# swiftly on
|
||||
if not compute_node.numa_topology:
|
||||
return
|
||||
|
||||
# The ComputeNode.numa_topology is a StringField, deserialize
|
||||
numa = objects.NUMATopology.obj_from_db_obj(compute_node.numa_topology)
|
||||
|
||||
# If the host doesn't know of any pinned CPUs, we can continue
|
||||
if not any(cell.pinned_cpus for cell in numa.cells):
|
||||
return
|
||||
|
||||
# At this point, we know there's something to be migrated here but not
|
||||
# how much. If the allocations are None, we're at the startup of the
|
||||
# compute node and a Reshape is needed. Indicate this by raising the
|
||||
# ReshapeNeeded exception
|
||||
|
||||
if allocations is None:
|
||||
LOG.info(
|
||||
'Requesting provider tree reshape in order to move '
|
||||
'VCPU to PCPU allocations to the compute node '
|
||||
'provider %s', nodename)
|
||||
raise exception.ReshapeNeeded()
|
||||
|
||||
# Go figure out how many VCPUs to migrate to PCPUs. We've been telling
|
||||
# people for years *not* to mix pinned and unpinned instances, meaning
|
||||
# we should be able to move all VCPUs to PCPUs, but we never actually
|
||||
# enforced this in code and there's an all-too-high chance someone
|
||||
# didn't get the memo
|
||||
|
||||
allocations_needing_reshape = []
|
||||
|
||||
# we need to tackle the allocations against instances on this host...
|
||||
|
||||
instances = objects.InstanceList.get_by_host(
|
||||
ctx, compute_node.host, expected_attrs=['numa_topology'])
|
||||
for instance in instances:
|
||||
if not instance.numa_topology:
|
||||
continue
|
||||
|
||||
if not instance.numa_topology.cpu_pinning_requested:
|
||||
continue
|
||||
|
||||
allocations_needing_reshape.append(instance.uuid)
|
||||
|
||||
# ...and those for any migrations
|
||||
|
||||
migrations = objects.MigrationList.get_in_progress_by_host_and_node(
|
||||
ctx, compute_node.host, compute_node.hypervisor_hostname)
|
||||
for migration in migrations:
|
||||
# we don't care about migrations that have landed here, since we
|
||||
# already have those instances above
|
||||
if not migration.dest_compute or (
|
||||
migration.dest_compute == compute_node.host):
|
||||
continue
|
||||
|
||||
instance = objects.Instance.get_by_uuid(
|
||||
ctx, migration.instance_uuid, expected_attrs=['numa_topology'])
|
||||
|
||||
if not instance.numa_topology:
|
||||
continue
|
||||
|
||||
if not instance.numa_topology.cpu_pinning_requested:
|
||||
continue
|
||||
|
||||
allocations_needing_reshape.append(migration.uuid)
|
||||
|
||||
for allocation_uuid in allocations_needing_reshape:
|
||||
consumer_allocations = allocations.get(allocation_uuid, {}).get(
|
||||
'allocations', {})
|
||||
# TODO(stephenfin): We can probably just check the allocations for
|
||||
# ComputeNode.uuid since compute nodes are the only (?) provider of
|
||||
# VCPU and PCPU resources
|
||||
for rp_uuid in consumer_allocations:
|
||||
resources = consumer_allocations[rp_uuid]['resources']
|
||||
|
||||
if orc.PCPU in resources or orc.VCPU not in resources:
|
||||
# Either this has been migrated or it's not a compute node
|
||||
continue
|
||||
|
||||
# Switch stuff around. We can do a straight swap since an
|
||||
# instance is either pinned or unpinned. By doing this, we're
|
||||
# modifying the provided 'allocations' dict, which will
|
||||
# eventually be used by the resource tracker to update
|
||||
# placement
|
||||
resources['PCPU'] = resources['VCPU']
|
||||
del resources[orc.VCPU]
|
||||
|
||||
def get_available_resource(self, nodename):
|
||||
"""Retrieve resource information.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user