From 0b942dcaa5db81f927d1a300cb839b4e6c4d1c5e Mon Sep 17 00:00:00 2001
From: melanie witt <melwittt@gmail.com>
Date: Fri, 23 Oct 2020 04:02:08 +0000
Subject: [PATCH] Omit resource inventories from placement update if zero

When a compute node has zero total available for the:

  * MEMORY_MB
  * DISK_GB
  * VGPU
  * PMEM_NAMESPACE_*

resource classes, we attempt to PUT an inventory with 'total' of 0
which isn't allowed by the placement API. Doing this results in a 400
error from placement "JSON does not validate: 0 is less than the
minimum of 1" and ResourceProviderUpdateFailed and
ResourceProviderSyncFailed raised in nova.

We are already omitting most resource classes when their total amount
of the resource is 0 and we just need to also do it for the
aforementioned resource classes.

Closes-Bug: #1901120
Closes-Bug: #1906494

Change-Id: I022f3bbddbbdc24362b10004f273da2421788c97
---
 nova/tests/unit/virt/libvirt/test_driver.py | 48 +++++++++++++++++++++
 nova/virt/libvirt/driver.py                 | 37 ++++++++++------
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index cd0ee398c782..6ac046396008 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -21041,6 +21041,54 @@ class TestUpdateProviderTree(test.NoDBTestCase):
         for trait in ['HW_CPU_X86_AVX512F', 'HW_CPU_X86_BMI']:
             self.assertIn(trait, self.pt.data(self.cn_rp['uuid']).traits)
 
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories')
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                '_get_cpu_feature_traits',
+                new=mock.Mock(return_value=cpu_traits))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info',
+                new=mock.Mock(return_value={'total': 0}))
+    @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total',
+                new=mock.Mock(return_value=0))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_available',
+                new=mock.Mock(return_value=range(0)))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_available',
+                new=mock.Mock(return_value=range(0)))
+    @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                '_update_provider_tree_for_pcpu',
+                new=mock.Mock())
+    def test_update_provider_tree_zero_total(self, mock_gpu_invs):
+        # Verify that we omit various resources from inventory when there are
+        # zero total quantity of those resources. Placement does not allow
+        # inventory updates with total=0 as they fail API schema validation.
+
+        # Use total=0 for vgpus.
+        gpu_inventory_dicts = {
+            'pci_0000_06_00_0': {'total': 0,
+                                 'max_unit': 16,
+                                 'min_unit': 1,
+                                 'step_size': 1,
+                                 'reserved': 0,
+                                 'allocation_ratio': 1.0,
+                                },
+        }
+        mock_gpu_invs.return_value = gpu_inventory_dicts
+        # Use an empty list for vpmems.
+        self.driver._vpmems_by_rc = {'CUSTOM_PMEM_NAMESPACE_4GB': []}
+        # Before we update_provider_tree, we have 2 providers from setUp():
+        # self.cn_rp and self.shared_rp and they are both empty {}.
+        self.assertEqual(2, len(self.pt.get_provider_uuids()))
+        # Update the provider tree.
+        self.driver.update_provider_tree(self.pt, self.cn_rp['name'])
+        # After we update_provider_tree, we should still have 2 providers
+        # because VGPU has total=0 and we would skip adding a child provider
+        # for it.
+        self.assertEqual(2, len(self.pt.get_provider_uuids()))
+        # All providers should have an empty dict because (1) we never updated
+        # the self.shared_rp provider and (2) the other 2 providers have zero
+        # for resource totals.
+        for uuid in self.pt.get_provider_uuids():
+            self.assertEqual({}, self.pt.data(uuid).inventory)
+
     def test_update_provider_tree_with_vgpus(self):
         pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0']
         gpu_inventory_dicts = {
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 78a47d8165a4..cc01a4549027 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -7702,16 +7702,17 @@ class LibvirtDriver(driver.ComputeDriver):
         resources: ty.Dict[str, ty.Set['objects.Resource']] = (
             collections.defaultdict(set)
         )
-        result = {
-            orc.MEMORY_MB: {
+
+        result = {}
+        if memory_mb:
+            result[orc.MEMORY_MB] = {
                 'total': memory_mb,
                 'min_unit': 1,
                 'max_unit': memory_mb,
                 'step_size': 1,
                 'allocation_ratio': ratios[orc.MEMORY_MB],
                 'reserved': CONF.reserved_host_memory_mb,
-            },
-        }
+            }
 
         # NOTE(stephenfin): We have to optionally report these since placement
         # forbids reporting inventory with total=0
@@ -7752,15 +7753,17 @@ class LibvirtDriver(driver.ComputeDriver):
         # compute RP once the issues from bug #1784020 have been resolved.
         if provider_tree.has_sharing_provider(orc.DISK_GB):
             LOG.debug('Ignoring sharing provider - see bug #1784020')
-        result[orc.DISK_GB] = {
-            'total': disk_gb,
-            'min_unit': 1,
-            'max_unit': disk_gb,
-            'step_size': 1,
-            'allocation_ratio': ratios[orc.DISK_GB],
-            'reserved': (self._get_reserved_host_disk_gb_from_config() +
-                         self._get_disk_size_reserved_for_image_cache()),
-        }
+
+        if disk_gb:
+            result[orc.DISK_GB] = {
+                'total': disk_gb,
+                'min_unit': 1,
+                'max_unit': disk_gb,
+                'step_size': 1,
+                'allocation_ratio': ratios[orc.DISK_GB],
+                'reserved': (self._get_reserved_host_disk_gb_from_config() +
+                             self._get_disk_size_reserved_for_image_cache()),
+            }
 
         # TODO(sbauza): Use traits to providing vGPU types. For the moment,
         # it will be only documentation support by explaining to use
@@ -7795,6 +7798,10 @@ class LibvirtDriver(driver.ComputeDriver):
         """Update resources and inventory for vpmems in provider tree."""
         prov_data = provider_tree.data(nodename)
         for rc, vpmems in self._vpmems_by_rc.items():
+            # Skip (and omit) inventories with total=0 because placement does
+            # not allow setting total=0 for inventory.
+            if not len(vpmems):
+                continue
             inventory[rc] = {
                 'total': len(vpmems),
                 'max_unit': len(vpmems),
@@ -7907,6 +7914,10 @@ class LibvirtDriver(driver.ComputeDriver):
         # Dict of PGPU RPs keyed by their libvirt PCI name
         pgpu_rps = {}
         for pgpu_dev_id, inventory in inventories_dict.items():
+            # Skip (and omit) inventories with total=0 because placement does
+            # not allow setting total=0 for inventory.
+            if not inventory['total']:
+                continue
             # For each physical GPU, we make sure to have a child provider
             pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id)
             if not provider_tree.exists(pgpu_rp_name):