From 0b942dcaa5db81f927d1a300cb839b4e6c4d1c5e Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 23 Oct 2020 04:02:08 +0000 Subject: [PATCH] Omit resource inventories from placement update if zero When a compute node has zero total available for the: * MEMORY_MB * DISK_GB * VGPU * PMEM_NAMESPACE_* resource classes, we attempt to PUT an inventory with 'total' of 0 which isn't allowed by the placement API. Doing this results in a 400 error from placement "JSON does not validate: 0 is less than the minimum of 1" and ResourceProviderUpdateFailed and ResourceProviderSyncFailed raised in nova. We are already omitting most resource classes when their total amount of the resource is 0 and we just need to also do it for the aforementioned resource classes. Closes-Bug: #1901120 Closes-Bug: #1906494 Change-Id: I022f3bbddbbdc24362b10004f273da2421788c97 --- nova/tests/unit/virt/libvirt/test_driver.py | 48 +++++++++++++++++++++ nova/virt/libvirt/driver.py | 37 ++++++++++------ 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index cd0ee398c782..6ac046396008 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -21041,6 +21041,54 @@ class TestUpdateProviderTree(test.NoDBTestCase): for trait in ['HW_CPU_X86_AVX512F', 'HW_CPU_X86_BMI']: self.assertIn(trait, self.pt.data(self.cn_rp['uuid']).traits) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_cpu_feature_traits', + new=mock.Mock(return_value=cpu_traits)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', + new=mock.Mock(return_value={'total': 0})) + @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', + new=mock.Mock(return_value=0)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_available', + new=mock.Mock(return_value=range(0))) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available', + new=mock.Mock(return_value=range(0))) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_update_provider_tree_for_pcpu', + new=mock.Mock()) + def test_update_provider_tree_zero_total(self, mock_gpu_invs): + # Verify that we omit various resources from inventory when there are + # zero total quantity of those resources. Placement does not allow + # inventory updates with total=0 as they fail API schema validation. + + # Use total=0 for vgpus. + gpu_inventory_dicts = { + 'pci_0000_06_00_0': {'total': 0, + 'max_unit': 16, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + mock_gpu_invs.return_value = gpu_inventory_dicts + # Use an empty list for vpmems. + self.driver._vpmems_by_rc = {'CUSTOM_PMEM_NAMESPACE_4GB': []} + # Before we update_provider_tree, we have 2 providers from setUp(): + # self.cn_rp and self.shared_rp and they are both empty {}. + self.assertEqual(2, len(self.pt.get_provider_uuids())) + # Update the provider tree. + self.driver.update_provider_tree(self.pt, self.cn_rp['name']) + # After we update_provider_tree, we should still have 2 providers + # because VGPU has total=0 and we would skip adding a child provider + # for it. + self.assertEqual(2, len(self.pt.get_provider_uuids())) + # All providers should have an empty dict because (1) we never updated + # the self.shared_rp provider and (2) the other 2 providers have zero + # for resource totals. + for uuid in self.pt.get_provider_uuids(): + self.assertEqual({}, self.pt.data(uuid).inventory) + def test_update_provider_tree_with_vgpus(self): pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] gpu_inventory_dicts = { diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 78a47d8165a4..cc01a4549027 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7702,16 +7702,17 @@ class LibvirtDriver(driver.ComputeDriver): resources: ty.Dict[str, ty.Set['objects.Resource']] = ( collections.defaultdict(set) ) - result = { - orc.MEMORY_MB: { + + result = {} + if memory_mb: + result[orc.MEMORY_MB] = { 'total': memory_mb, 'min_unit': 1, 'max_unit': memory_mb, 'step_size': 1, 'allocation_ratio': ratios[orc.MEMORY_MB], 'reserved': CONF.reserved_host_memory_mb, - }, - } + } # NOTE(stephenfin): We have to optionally report these since placement # forbids reporting inventory with total=0 @@ -7752,15 +7753,17 @@ class LibvirtDriver(driver.ComputeDriver): # compute RP once the issues from bug #1784020 have been resolved. if provider_tree.has_sharing_provider(orc.DISK_GB): LOG.debug('Ignoring sharing provider - see bug #1784020') - result[orc.DISK_GB] = { - 'total': disk_gb, - 'min_unit': 1, - 'max_unit': disk_gb, - 'step_size': 1, - 'allocation_ratio': ratios[orc.DISK_GB], - 'reserved': (self._get_reserved_host_disk_gb_from_config() + - self._get_disk_size_reserved_for_image_cache()), - } + + if disk_gb: + result[orc.DISK_GB] = { + 'total': disk_gb, + 'min_unit': 1, + 'max_unit': disk_gb, + 'step_size': 1, + 'allocation_ratio': ratios[orc.DISK_GB], + 'reserved': (self._get_reserved_host_disk_gb_from_config() + + self._get_disk_size_reserved_for_image_cache()), + } # TODO(sbauza): Use traits to providing vGPU types. For the moment, # it will be only documentation support by explaining to use @@ -7795,6 +7798,10 @@ class LibvirtDriver(driver.ComputeDriver): """Update resources and inventory for vpmems in provider tree.""" prov_data = provider_tree.data(nodename) for rc, vpmems in self._vpmems_by_rc.items(): + # Skip (and omit) inventories with total=0 because placement does + # not allow setting total=0 for inventory. + if not len(vpmems): + continue inventory[rc] = { 'total': len(vpmems), 'max_unit': len(vpmems), @@ -7907,6 +7914,10 @@ class LibvirtDriver(driver.ComputeDriver): # Dict of PGPU RPs keyed by their libvirt PCI name pgpu_rps = {} for pgpu_dev_id, inventory in inventories_dict.items(): + # Skip (and omit) inventories with total=0 because placement does + # not allow setting total=0 for inventory. + if not inventory['total']: + continue # For each physical GPU, we make sure to have a child provider pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id) if not provider_tree.exists(pgpu_rp_name):