Report client: _reshape helper, placement min bump

Add a thin wrapper to invoke the POST /reshaper placement API with appropriate error checking. This bumps the placement minimum to the reshaper microversion, 1.30. Change-Id: Idf8997d5efdfdfca6967899a0882ffb9ecf96915 blueprint: reshape-provider-tree
2018-07-23 14:22:27 -05:00 · 2018-07-23 14:22:27 -05:00 · 2833785f59
commit 2833785f59
parent 25b852efd7
6 changed files with 203 additions and 97 deletions
--- a/doc/source/cli/nova-status.rst
+++ b/doc/source/cli/nova-status.rst
@ -118,6 +118,10 @@ Upgrade
  * Checks that existing instances have been migrated to have a matching
    request spec in the API DB.

+  **19.0.0 (Stein)**
+
+  * Checks for the Placement API are modified to require version 1.30.
+
 See Also
 ========

--- a/doc/source/user/placement.rst
+++ b/doc/source/user/placement.rst
@ -57,11 +57,13 @@ changed or be partially complete at this time.
 * `Request Traits During Scheduling`_
 * `filter allocation candidates by aggregate membership`_
 * `perform granular allocation candidate requests`_
+* `inventory and allocation data migration`_ (reshaping provider trees)

 .. _Nested Resource Providers: http://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html
 .. _Request Traits During Scheduling: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/request-traits-in-nova.html
 .. _filter allocation candidates by aggregate membership: https://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/alloc-candidates-member-of.html
 .. _perform granular allocation candidate requests: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/granular-resource-requests.html
+.. _inventory and allocation data migration: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/reshape-provider-tree.html

 Deployment
 ==========
--- a/nova/cmd/status.py
+++ b/nova/cmd/status.py
@ -52,12 +52,11 @@ CONF = nova.conf.CONF
 PLACEMENT_DOCS_LINK = 'https://docs.openstack.org/nova/latest' \
                      '/user/placement.html'

-# NOTE(efried): 1.28 is required by "nova-manage placement heal_allocations"
-# to get the consumer generation when updating incomplete allocations with
-# instance consumer project_id and user_id values.
+# NOTE(efried): 1.30 is required by nova-compute to support resource provider
+# reshaping (inventory and allocation data migration).
 # NOTE: If you bump this version, remember to update the history
 # section in the nova-status man page (doc/source/cli/nova-status).
-MIN_PLACEMENT_MICROVERSION = "1.28"
+MIN_PLACEMENT_MICROVERSION = "1.30"


 class UpgradeCheckCode(enum.IntEnum):
--- a/nova/exception.py
+++ b/nova/exception.py
@ -2378,3 +2378,8 @@ class ResourceProviderAllocationRetrievalFailed(NovaException):
 class ConsumerAllocationRetrievalFailed(NovaException):
    msg_fmt = _("Failed to retrieve allocations for consumer "
                "%(consumer_uuid)s: %(error)s")
+
+
+class ReshapeFailed(NovaException):
+    msg_fmt = _("Resource provider inventory and allocation data migration "
+                "failed: %(error)s")
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -47,6 +47,7 @@ _RE_INV_IN_USE = re.compile("Inventory for (.+) on resource provider "
                            "(.+) in use")
 WARN_EVERY = 10
 PLACEMENT_CLIENT_SEMAPHORE = 'placement_client'
+RESHAPER_VERSION = '1.30'
 CONSUMER_GENERATION_VERSION = '1.28'
 GRANULAR_AC_VERSION = '1.25'
 ALLOW_RESERVED_EQUAL_TOTAL_INVENTORY_VERSION = '1.26'
@ -1420,6 +1421,40 @@ class SchedulerReportClient(object):
        # when we invoke the DELETE.  See bug #1746374.
        self._update_inventory(context, compute_node.uuid, inv_data)

+    def _reshape(self, context, inventories, allocations):
+        """Perform atomic inventory & allocation data migration.
+
+        :param context: The security context
+        :param inventories: A dict, keyed by resource provider UUID, of:
+                { "inventories": { inventory dicts, keyed by resource class },
+                  "resource_provider_generation": $RP_GEN }
+        :param allocations: A dict, keyed by consumer UUID, of:
+                { "project_id": $PROJ_ID,
+                  "user_id": $USER_ID,
+                  "consumer_generation": $CONSUMER_GEN,
+                  "allocations": {
+                      $RP_UUID: {
+                          "resources": { $RC: $AMOUNT, ... }
+                      },
+                      ...
+                  }
+                }
+        :return: The Response object representing a successful API call.
+        :raises: ReshapeFailed if the POST /reshaper request fails.
+        :raises: keystoneauth1.exceptions.ClientException if placement API
+                 communication fails.
+        """
+        # We have to make sure any new resource classes exist
+        for invs in inventories.values():
+            self._ensure_resource_classes(context, list(invs['inventories']))
+        payload = {"inventories": inventories, "allocations": allocations}
+        resp = self.post('/reshaper', payload, version=RESHAPER_VERSION,
+                         global_request_id=context.global_id)
+        if not resp:
+            raise exception.ReshapeFailed(error=resp.text)
+
+        return resp
+
    def update_from_provider_tree(self, context, new_tree):
        """Flush changes from a specified ProviderTree back to placement.

--- a/nova/tests/functional/test_report_client.py
+++ b/nova/tests/functional/test_report_client.py
@ -11,6 +11,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+from keystoneauth1 import exceptions as kse
 import mock
 import pkg_resources

@ -1033,6 +1034,112 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
            self.client.get_allocation_candidates(
                self.context, utils.ResourceRequest())

+    def _set_up_provider_tree(self):
+        """Create two compute nodes in placement: "this" one, and another one.
+
+        Must be invoked from within an _interceptor() context.
+        """
+        # get_provider_tree_and_ensure_root creates a resource provider
+        # record for us
+        ptree = self.client.get_provider_tree_and_ensure_root(
+            self.context, self.compute_uuid, name=self.compute_name)
+        ptree.update_inventory(self.compute_uuid,
+                               {'MEMORY_MB': {'total': 2048}})
+        ptree.update_aggregates(self.compute_uuid, [uuids.agg1])
+
+        # These are part of the compute node's tree
+        ptree.new_child('numa1', self.compute_uuid, uuid=uuids.numa1)
+        ptree.update_inventory('numa1', {'VCPU': {'total': 8},
+                                         'CUSTOM_PCPU': {'total': 8}})
+        ptree.new_child('numa2', self.compute_uuid, uuid=uuids.numa2)
+        ptree.update_inventory('numa2', {'VCPU': {'total': 8},
+                                         'CUSTOM_PCPU': {'total': 8}})
+
+        # A sharing provider that's not part of the compute node's tree.
+        # We avoid the report client's convenience methods to get bonus
+        # coverage of the subsequent update_from_provider_tree pulling it
+        # into the cache for us.
+        resp = self.client.post(
+            '/resource_providers',
+            {'uuid': uuids.ssp, 'name': 'ssp'}, version='1.20')
+        resp = self.client.put(
+            '/resource_providers/%s/inventories' % uuids.ssp,
+            {'inventories': {'DISK_GB': {'total': 500}},
+             'resource_provider_generation': resp.json()['generation']})
+        # Part of the shared storage aggregate
+        resp = self.client.put(
+            '/resource_providers/%s/aggregates' % uuids.ssp,
+            {'aggregates': [uuids.agg1],
+             'resource_provider_generation':
+                 resp.json()['resource_provider_generation']},
+            version='1.19')
+        self.client.put(
+            '/resource_providers/%s/traits' % uuids.ssp,
+            {'traits': ['MISC_SHARES_VIA_AGGREGATE'],
+             'resource_provider_generation':
+                 resp.json()['resource_provider_generation']})
+
+        self.client.update_from_provider_tree(self.context, ptree)
+
+        # Another unrelated compute node. We don't use the report client's
+        # convenience methods because we don't want this guy in the cache.
+        resp = self.client.post(
+            '/resource_providers',
+            {'uuid': uuids.othercn, 'name': 'othercn'}, version='1.20')
+        resp = self.client.put(
+            '/resource_providers/%s/inventories' % uuids.othercn,
+            {'inventories': {'VCPU': {'total': 8},
+                             'MEMORY_MB': {'total': 1024}},
+             'resource_provider_generation': resp.json()['generation']})
+        # Part of the shared storage aggregate
+        self.client.put(
+            '/resource_providers/%s/aggregates' % uuids.othercn,
+            {'aggregates': [uuids.ssp],
+             'resource_provider_generation':
+                 resp.json()['resource_provider_generation']},
+            version='1.19')
+
+    def _set_up_provider_tree_allocs(self):
+        """Create some allocations on our compute (with sharing).
+
+        Must be invoked from within an _interceptor() context.
+        """
+        cn_inst1_allocs = {
+            'allocations': {
+                self.compute_uuid: {'resources': {'MEMORY_MB': 512}},
+                uuids.numa1: {'resources': {'VCPU': 2, 'CUSTOM_PCPU': 2}},
+                uuids.ssp: {'resources': {'DISK_GB': 100}}
+            },
+            'consumer_generation': None,
+            'project_id': uuids.proj,
+            'user_id': uuids.user,
+        }
+        self.client.put('/allocations/' + uuids.cn_inst1, cn_inst1_allocs)
+        cn_inst2_allocs = {
+            'allocations': {
+                self.compute_uuid: {'resources': {'MEMORY_MB': 256}},
+                uuids.numa2: {'resources': {'CUSTOM_PCPU': 1}},
+                uuids.ssp: {'resources': {'DISK_GB': 50}}
+            },
+            'consumer_generation': None,
+            'project_id': uuids.proj,
+            'user_id': uuids.user,
+        }
+        self.client.put('/allocations/' + uuids.cn_inst2, cn_inst2_allocs)
+        # And on the other compute (with sharing)
+        self.client.put(
+            '/allocations/' + uuids.othercn_inst,
+            {'allocations': {
+                uuids.othercn: {'resources': {'VCPU': 2, 'MEMORY_MB': 64}},
+                uuids.ssp: {'resources': {'DISK_GB': 30}}
+            },
+                'consumer_generation': None,
+                'project_id': uuids.proj,
+                'user_id': uuids.user,
+            })
+
+        return cn_inst1_allocs, cn_inst2_allocs
+
    def test_get_allocations_for_provider_tree(self):
        with self._interceptor():
            # When the provider tree cache is empty (or we otherwise supply a
@ -1041,104 +1148,14 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
                              self.client.get_allocations_for_provider_tree,
                              self.context, 'bogus')

-            # get_provider_tree_and_ensure_root creates a resource provider
-            # record for us
-            ptree = self.client.get_provider_tree_and_ensure_root(
-                self.context, self.compute_uuid, name=self.compute_name)
-            ptree.update_inventory(self.compute_uuid,
-                                   {'MEMORY_MB': {'total': 2048}})
-            ptree.update_aggregates(self.compute_uuid, [uuids.agg1])
-
-            # These are part of the compute node's tree
-            ptree.new_child('numa1', self.compute_uuid, uuid=uuids.numa1)
-            ptree.update_inventory('numa1', {'VCPU': {'total': 8},
-                                             'CUSTOM_PCPU': {'total': 8}})
-            ptree.new_child('numa2', self.compute_uuid, uuid=uuids.numa2)
-            ptree.update_inventory('numa2', {'VCPU': {'total': 8},
-                                             'CUSTOM_PCPU': {'total': 8}})
-
-            # A sharing provider that's not part of the compute node's tree.
-            # We avoid the report client's convenience methods to get bonus
-            # coverage of the subsequent update_from_provider_tree pulling it
-            # into the cache for us.
-            resp = self.client.post(
-                '/resource_providers',
-                {'uuid': uuids.ssp, 'name': 'ssp'}, version='1.20')
-            resp = self.client.put(
-                '/resource_providers/%s/inventories' % uuids.ssp,
-                {'inventories': {'DISK_GB': {'total': 500}},
-                 'resource_provider_generation': resp.json()['generation']})
-            # Part of the shared storage aggregate
-            resp = self.client.put(
-                '/resource_providers/%s/aggregates' % uuids.ssp,
-                {'aggregates': [uuids.agg1],
-                 'resource_provider_generation':
-                     resp.json()['resource_provider_generation']},
-                version='1.19')
-            self.client.put(
-                '/resource_providers/%s/traits' % uuids.ssp,
-                {'traits': ['MISC_SHARES_VIA_AGGREGATE'],
-                 'resource_provider_generation':
-                     resp.json()['resource_provider_generation']})
-
-            self.client.update_from_provider_tree(self.context, ptree)
-
-            # Another unrelated compute node. We don't use the report client's
-            # convenience methods because we don't want this guy in the cache.
-            resp = self.client.post(
-                '/resource_providers',
-                {'uuid': uuids.othercn, 'name': 'othercn'}, version='1.20')
-            resp = self.client.put(
-                '/resource_providers/%s/inventories' % uuids.othercn,
-                {'inventories': {'VCPU': {'total': 8},
-                                 'MEMORY_MB': {'total': 1024}},
-                 'resource_provider_generation': resp.json()['generation']})
-            # Part of the shared storage aggregate
-            self.client.put(
-                '/resource_providers/%s/aggregates' % uuids.othercn,
-                {'aggregates': [uuids.agg1],
-                 'resource_provider_generation':
-                     resp.json()['resource_provider_generation']},
-                version='1.19')
+            self._set_up_provider_tree()

            # At this point, there are no allocations
            self.assertEqual({}, self.client.get_allocations_for_provider_tree(
                self.context, self.compute_name))

-            # Create some allocations on our compute (with sharing)
-            cn_inst1_allocs = {
-                'allocations': {
-                    self.compute_uuid: {'resources': {'MEMORY_MB': 512}},
-                    uuids.numa1: {'resources': {'VCPU': 2, 'CUSTOM_PCPU': 2}},
-                    uuids.ssp: {'resources': {'DISK_GB': 100}}
-                },
-                'consumer_generation': None,
-                'project_id': uuids.proj,
-                'user_id': uuids.user,
-            }
-            self.client.put('/allocations/' + uuids.cn_inst1, cn_inst1_allocs)
-            cn_inst2_allocs = {
-                'allocations': {
-                    self.compute_uuid: {'resources': {'MEMORY_MB': 256}},
-                    uuids.numa2: {'resources': {'CUSTOM_PCPU': 1}},
-                    uuids.ssp: {'resources': {'DISK_GB': 50}}
-                },
-                'consumer_generation': None,
-                'project_id': uuids.proj,
-                'user_id': uuids.user,
-            }
-            self.client.put('/allocations/' + uuids.cn_inst2, cn_inst2_allocs)
-            # And on the other compute (with sharing)
-            self.client.put(
-                '/allocations/' + uuids.othercn_inst,
-                {'allocations': {
-                    uuids.othercn: {'resources': {'VCPU': 2, 'MEMORY_MB': 64}},
-                    uuids.ssp: {'resources': {'DISK_GB': 30}}
-                 },
-                 'consumer_generation': None,
-                 'project_id': uuids.proj,
-                 'user_id': uuids.user,
-                })
+            cn_inst1_allocs, cn_inst2_allocs = (
+                self._set_up_provider_tree_allocs())

            # And now we should get all the right allocations. Note that we see
            # nothing from othercn_inst.
@ -1157,3 +1174,47 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
                    if 'generation' in alloc:
                        del alloc['generation']
            self.assertEqual(expected, actual)
+
+    def test_reshape(self):
+        """Smoke test the report client shim for the reshaper API."""
+        with self._interceptor():
+            # Simulate placement API communication failure
+            with mock.patch.object(
+                    self.client, 'post', side_effect=kse.MissingAuthPlugin):
+                self.assertRaises(kse.ClientException,
+                                  self.client._reshape, self.context, {}, {})
+
+            # Invalid payload (empty inventories) results in a 409, which the
+            # report client converts to ReshapeFailed
+            try:
+                self.client._reshape(self.context, {}, {})
+            except exception.ReshapeFailed as e:
+                self.assertIn('JSON does not validate: {} does not have '
+                              'enough properties', e.kwargs['error'])
+
+            # Okay, do some real stuffs. We're just smoke-testing that we can
+            # hit a good path to the API here; real testing of the API happens
+            # in gabbits and via update_from_provider_tree.
+            self._set_up_provider_tree()
+            self._set_up_provider_tree_allocs()
+
+            ptree = self.client.get_provider_tree_and_ensure_root(
+                self.context, self.compute_uuid)
+            inventories = {}
+            for rp_uuid in ptree.get_provider_uuids():
+                data = ptree.data(rp_uuid)
+                # Add a new resource class to the inventories
+                inventories[rp_uuid] = {
+                    "inventories": dict(data.inventory,
+                                        CUSTOM_FOO={'total': 10}),
+                    "resource_provider_generation": data.generation
+                }
+
+            allocs = self.client.get_allocations_for_provider_tree(
+                self.context, self.compute_name)
+            for alloc in allocs.values():
+                for res in alloc['allocations'].values():
+                    res['resources']['CUSTOM_FOO'] = 1
+
+            resp = self.client._reshape(self.context, inventories, allocs)
+            self.assertEqual(204, resp.status_code)