Omit resource inventories from placement update if zero

When a compute node has zero total available for the:

  * MEMORY_MB
  * DISK_GB
  * VGPU
  * PMEM_NAMESPACE_*

resource classes, we attempt to PUT an inventory with 'total' of 0
which isn't allowed by the placement API. Doing this results in a 400
error from placement "JSON does not validate: 0 is less than the
minimum of 1" and ResourceProviderUpdateFailed and
ResourceProviderSyncFailed raised in nova.

We are already omitting most resource classes when their total amount
of the resource is 0 and we just need to also do it for the
aforementioned resource classes.

Closes-Bug: #1901120
Closes-Bug: #1906494

Change-Id: I022f3bbddbbdc24362b10004f273da2421788c97
This commit is contained in:
melanie witt 2020-10-23 04:02:08 +00:00
parent 6a5e158756
commit 0b942dcaa5
2 changed files with 72 additions and 13 deletions

View File

@ -21041,6 +21041,54 @@ class TestUpdateProviderTree(test.NoDBTestCase):
for trait in ['HW_CPU_X86_AVX512F', 'HW_CPU_X86_BMI']:
self.assertIn(trait, self.pt.data(self.cn_rp['uuid']).traits)
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories')
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
'_get_cpu_feature_traits',
new=mock.Mock(return_value=cpu_traits))
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info',
new=mock.Mock(return_value={'total': 0}))
@mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total',
new=mock.Mock(return_value=0))
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_available',
new=mock.Mock(return_value=range(0)))
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
new=mock.Mock(return_value=range(0)))
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
'_update_provider_tree_for_pcpu',
new=mock.Mock())
def test_update_provider_tree_zero_total(self, mock_gpu_invs):
# Verify that we omit various resources from inventory when there are
# zero total quantity of those resources. Placement does not allow
# inventory updates with total=0 as they fail API schema validation.
# Use total=0 for vgpus.
gpu_inventory_dicts = {
'pci_0000_06_00_0': {'total': 0,
'max_unit': 16,
'min_unit': 1,
'step_size': 1,
'reserved': 0,
'allocation_ratio': 1.0,
},
}
mock_gpu_invs.return_value = gpu_inventory_dicts
# Use an empty list for vpmems.
self.driver._vpmems_by_rc = {'CUSTOM_PMEM_NAMESPACE_4GB': []}
# Before we update_provider_tree, we have 2 providers from setUp():
# self.cn_rp and self.shared_rp and they are both empty {}.
self.assertEqual(2, len(self.pt.get_provider_uuids()))
# Update the provider tree.
self.driver.update_provider_tree(self.pt, self.cn_rp['name'])
# After we update_provider_tree, we should still have 2 providers
# because VGPU has total=0 and we would skip adding a child provider
# for it.
self.assertEqual(2, len(self.pt.get_provider_uuids()))
# All providers should have an empty dict because (1) we never updated
# the self.shared_rp provider and (2) the other 2 providers have zero
# for resource totals.
for uuid in self.pt.get_provider_uuids():
self.assertEqual({}, self.pt.data(uuid).inventory)
def test_update_provider_tree_with_vgpus(self):
pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0']
gpu_inventory_dicts = {

View File

@ -7702,16 +7702,17 @@ class LibvirtDriver(driver.ComputeDriver):
resources: ty.Dict[str, ty.Set['objects.Resource']] = (
collections.defaultdict(set)
)
result = {
orc.MEMORY_MB: {
result = {}
if memory_mb:
result[orc.MEMORY_MB] = {
'total': memory_mb,
'min_unit': 1,
'max_unit': memory_mb,
'step_size': 1,
'allocation_ratio': ratios[orc.MEMORY_MB],
'reserved': CONF.reserved_host_memory_mb,
},
}
}
# NOTE(stephenfin): We have to optionally report these since placement
# forbids reporting inventory with total=0
@ -7752,15 +7753,17 @@ class LibvirtDriver(driver.ComputeDriver):
# compute RP once the issues from bug #1784020 have been resolved.
if provider_tree.has_sharing_provider(orc.DISK_GB):
LOG.debug('Ignoring sharing provider - see bug #1784020')
result[orc.DISK_GB] = {
'total': disk_gb,
'min_unit': 1,
'max_unit': disk_gb,
'step_size': 1,
'allocation_ratio': ratios[orc.DISK_GB],
'reserved': (self._get_reserved_host_disk_gb_from_config() +
self._get_disk_size_reserved_for_image_cache()),
}
if disk_gb:
result[orc.DISK_GB] = {
'total': disk_gb,
'min_unit': 1,
'max_unit': disk_gb,
'step_size': 1,
'allocation_ratio': ratios[orc.DISK_GB],
'reserved': (self._get_reserved_host_disk_gb_from_config() +
self._get_disk_size_reserved_for_image_cache()),
}
# TODO(sbauza): Use traits to providing vGPU types. For the moment,
# it will be only documentation support by explaining to use
@ -7795,6 +7798,10 @@ class LibvirtDriver(driver.ComputeDriver):
"""Update resources and inventory for vpmems in provider tree."""
prov_data = provider_tree.data(nodename)
for rc, vpmems in self._vpmems_by_rc.items():
# Skip (and omit) inventories with total=0 because placement does
# not allow setting total=0 for inventory.
if not len(vpmems):
continue
inventory[rc] = {
'total': len(vpmems),
'max_unit': len(vpmems),
@ -7907,6 +7914,10 @@ class LibvirtDriver(driver.ComputeDriver):
# Dict of PGPU RPs keyed by their libvirt PCI name
pgpu_rps = {}
for pgpu_dev_id, inventory in inventories_dict.items():
# Skip (and omit) inventories with total=0 because placement does
# not allow setting total=0 for inventory.
if not inventory['total']:
continue
# For each physical GPU, we make sure to have a child provider
pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id)
if not provider_tree.exists(pgpu_rp_name):