diff --git a/doc/source/cli/nova-status.rst b/doc/source/cli/nova-status.rst index 33a05c21aab1..2a62dfbd56c3 100644 --- a/doc/source/cli/nova-status.rst +++ b/doc/source/cli/nova-status.rst @@ -118,6 +118,10 @@ Upgrade * Checks that existing instances have been migrated to have a matching request spec in the API DB. + **19.0.0 (Stein)** + + * Checks for the Placement API are modified to require version 1.30. + See Also ======== diff --git a/doc/source/user/placement.rst b/doc/source/user/placement.rst index bc714a622760..f22421230f90 100644 --- a/doc/source/user/placement.rst +++ b/doc/source/user/placement.rst @@ -57,11 +57,13 @@ changed or be partially complete at this time. * `Request Traits During Scheduling`_ * `filter allocation candidates by aggregate membership`_ * `perform granular allocation candidate requests`_ +* `inventory and allocation data migration`_ (reshaping provider trees) .. _Nested Resource Providers: http://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html .. _Request Traits During Scheduling: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/request-traits-in-nova.html .. _filter allocation candidates by aggregate membership: https://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/alloc-candidates-member-of.html .. _perform granular allocation candidate requests: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/granular-resource-requests.html +.. _inventory and allocation data migration: http://specs.openstack.org/openstack/nova-specs/specs/rocky/approved/reshape-provider-tree.html Deployment ========== diff --git a/nova/cmd/status.py b/nova/cmd/status.py index cdea1f2d7b7e..954bcdcceb32 100644 --- a/nova/cmd/status.py +++ b/nova/cmd/status.py @@ -52,12 +52,11 @@ CONF = nova.conf.CONF PLACEMENT_DOCS_LINK = 'https://docs.openstack.org/nova/latest' \ '/user/placement.html' -# NOTE(efried): 1.28 is required by "nova-manage placement heal_allocations" -# to get the consumer generation when updating incomplete allocations with -# instance consumer project_id and user_id values. +# NOTE(efried): 1.30 is required by nova-compute to support resource provider +# reshaping (inventory and allocation data migration). # NOTE: If you bump this version, remember to update the history # section in the nova-status man page (doc/source/cli/nova-status). -MIN_PLACEMENT_MICROVERSION = "1.28" +MIN_PLACEMENT_MICROVERSION = "1.30" class UpgradeCheckCode(enum.IntEnum): diff --git a/nova/exception.py b/nova/exception.py index 9b2f96a5d74d..e26210021078 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2378,3 +2378,8 @@ class ResourceProviderAllocationRetrievalFailed(NovaException): class ConsumerAllocationRetrievalFailed(NovaException): msg_fmt = _("Failed to retrieve allocations for consumer " "%(consumer_uuid)s: %(error)s") + + +class ReshapeFailed(NovaException): + msg_fmt = _("Resource provider inventory and allocation data migration " + "failed: %(error)s") diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index b741d5d0c542..656ceb9bb831 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -47,6 +47,7 @@ _RE_INV_IN_USE = re.compile("Inventory for (.+) on resource provider " "(.+) in use") WARN_EVERY = 10 PLACEMENT_CLIENT_SEMAPHORE = 'placement_client' +RESHAPER_VERSION = '1.30' CONSUMER_GENERATION_VERSION = '1.28' GRANULAR_AC_VERSION = '1.25' ALLOW_RESERVED_EQUAL_TOTAL_INVENTORY_VERSION = '1.26' @@ -1420,6 +1421,40 @@ class SchedulerReportClient(object): # when we invoke the DELETE. See bug #1746374. self._update_inventory(context, compute_node.uuid, inv_data) + def _reshape(self, context, inventories, allocations): + """Perform atomic inventory & allocation data migration. + + :param context: The security context + :param inventories: A dict, keyed by resource provider UUID, of: + { "inventories": { inventory dicts, keyed by resource class }, + "resource_provider_generation": $RP_GEN } + :param allocations: A dict, keyed by consumer UUID, of: + { "project_id": $PROJ_ID, + "user_id": $USER_ID, + "consumer_generation": $CONSUMER_GEN, + "allocations": { + $RP_UUID: { + "resources": { $RC: $AMOUNT, ... } + }, + ... + } + } + :return: The Response object representing a successful API call. + :raises: ReshapeFailed if the POST /reshaper request fails. + :raises: keystoneauth1.exceptions.ClientException if placement API + communication fails. + """ + # We have to make sure any new resource classes exist + for invs in inventories.values(): + self._ensure_resource_classes(context, list(invs['inventories'])) + payload = {"inventories": inventories, "allocations": allocations} + resp = self.post('/reshaper', payload, version=RESHAPER_VERSION, + global_request_id=context.global_id) + if not resp: + raise exception.ReshapeFailed(error=resp.text) + + return resp + def update_from_provider_tree(self, context, new_tree): """Flush changes from a specified ProviderTree back to placement. diff --git a/nova/tests/functional/test_report_client.py b/nova/tests/functional/test_report_client.py index bfb30b5e21d4..206e80e01e63 100644 --- a/nova/tests/functional/test_report_client.py +++ b/nova/tests/functional/test_report_client.py @@ -11,6 +11,7 @@ # License for the specific language governing permissions and limitations # under the License. +from keystoneauth1 import exceptions as kse import mock import pkg_resources @@ -1033,6 +1034,112 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): self.client.get_allocation_candidates( self.context, utils.ResourceRequest()) + def _set_up_provider_tree(self): + """Create two compute nodes in placement: "this" one, and another one. + + Must be invoked from within an _interceptor() context. + """ + # get_provider_tree_and_ensure_root creates a resource provider + # record for us + ptree = self.client.get_provider_tree_and_ensure_root( + self.context, self.compute_uuid, name=self.compute_name) + ptree.update_inventory(self.compute_uuid, + {'MEMORY_MB': {'total': 2048}}) + ptree.update_aggregates(self.compute_uuid, [uuids.agg1]) + + # These are part of the compute node's tree + ptree.new_child('numa1', self.compute_uuid, uuid=uuids.numa1) + ptree.update_inventory('numa1', {'VCPU': {'total': 8}, + 'CUSTOM_PCPU': {'total': 8}}) + ptree.new_child('numa2', self.compute_uuid, uuid=uuids.numa2) + ptree.update_inventory('numa2', {'VCPU': {'total': 8}, + 'CUSTOM_PCPU': {'total': 8}}) + + # A sharing provider that's not part of the compute node's tree. + # We avoid the report client's convenience methods to get bonus + # coverage of the subsequent update_from_provider_tree pulling it + # into the cache for us. + resp = self.client.post( + '/resource_providers', + {'uuid': uuids.ssp, 'name': 'ssp'}, version='1.20') + resp = self.client.put( + '/resource_providers/%s/inventories' % uuids.ssp, + {'inventories': {'DISK_GB': {'total': 500}}, + 'resource_provider_generation': resp.json()['generation']}) + # Part of the shared storage aggregate + resp = self.client.put( + '/resource_providers/%s/aggregates' % uuids.ssp, + {'aggregates': [uuids.agg1], + 'resource_provider_generation': + resp.json()['resource_provider_generation']}, + version='1.19') + self.client.put( + '/resource_providers/%s/traits' % uuids.ssp, + {'traits': ['MISC_SHARES_VIA_AGGREGATE'], + 'resource_provider_generation': + resp.json()['resource_provider_generation']}) + + self.client.update_from_provider_tree(self.context, ptree) + + # Another unrelated compute node. We don't use the report client's + # convenience methods because we don't want this guy in the cache. + resp = self.client.post( + '/resource_providers', + {'uuid': uuids.othercn, 'name': 'othercn'}, version='1.20') + resp = self.client.put( + '/resource_providers/%s/inventories' % uuids.othercn, + {'inventories': {'VCPU': {'total': 8}, + 'MEMORY_MB': {'total': 1024}}, + 'resource_provider_generation': resp.json()['generation']}) + # Part of the shared storage aggregate + self.client.put( + '/resource_providers/%s/aggregates' % uuids.othercn, + {'aggregates': [uuids.ssp], + 'resource_provider_generation': + resp.json()['resource_provider_generation']}, + version='1.19') + + def _set_up_provider_tree_allocs(self): + """Create some allocations on our compute (with sharing). + + Must be invoked from within an _interceptor() context. + """ + cn_inst1_allocs = { + 'allocations': { + self.compute_uuid: {'resources': {'MEMORY_MB': 512}}, + uuids.numa1: {'resources': {'VCPU': 2, 'CUSTOM_PCPU': 2}}, + uuids.ssp: {'resources': {'DISK_GB': 100}} + }, + 'consumer_generation': None, + 'project_id': uuids.proj, + 'user_id': uuids.user, + } + self.client.put('/allocations/' + uuids.cn_inst1, cn_inst1_allocs) + cn_inst2_allocs = { + 'allocations': { + self.compute_uuid: {'resources': {'MEMORY_MB': 256}}, + uuids.numa2: {'resources': {'CUSTOM_PCPU': 1}}, + uuids.ssp: {'resources': {'DISK_GB': 50}} + }, + 'consumer_generation': None, + 'project_id': uuids.proj, + 'user_id': uuids.user, + } + self.client.put('/allocations/' + uuids.cn_inst2, cn_inst2_allocs) + # And on the other compute (with sharing) + self.client.put( + '/allocations/' + uuids.othercn_inst, + {'allocations': { + uuids.othercn: {'resources': {'VCPU': 2, 'MEMORY_MB': 64}}, + uuids.ssp: {'resources': {'DISK_GB': 30}} + }, + 'consumer_generation': None, + 'project_id': uuids.proj, + 'user_id': uuids.user, + }) + + return cn_inst1_allocs, cn_inst2_allocs + def test_get_allocations_for_provider_tree(self): with self._interceptor(): # When the provider tree cache is empty (or we otherwise supply a @@ -1041,104 +1148,14 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): self.client.get_allocations_for_provider_tree, self.context, 'bogus') - # get_provider_tree_and_ensure_root creates a resource provider - # record for us - ptree = self.client.get_provider_tree_and_ensure_root( - self.context, self.compute_uuid, name=self.compute_name) - ptree.update_inventory(self.compute_uuid, - {'MEMORY_MB': {'total': 2048}}) - ptree.update_aggregates(self.compute_uuid, [uuids.agg1]) - - # These are part of the compute node's tree - ptree.new_child('numa1', self.compute_uuid, uuid=uuids.numa1) - ptree.update_inventory('numa1', {'VCPU': {'total': 8}, - 'CUSTOM_PCPU': {'total': 8}}) - ptree.new_child('numa2', self.compute_uuid, uuid=uuids.numa2) - ptree.update_inventory('numa2', {'VCPU': {'total': 8}, - 'CUSTOM_PCPU': {'total': 8}}) - - # A sharing provider that's not part of the compute node's tree. - # We avoid the report client's convenience methods to get bonus - # coverage of the subsequent update_from_provider_tree pulling it - # into the cache for us. - resp = self.client.post( - '/resource_providers', - {'uuid': uuids.ssp, 'name': 'ssp'}, version='1.20') - resp = self.client.put( - '/resource_providers/%s/inventories' % uuids.ssp, - {'inventories': {'DISK_GB': {'total': 500}}, - 'resource_provider_generation': resp.json()['generation']}) - # Part of the shared storage aggregate - resp = self.client.put( - '/resource_providers/%s/aggregates' % uuids.ssp, - {'aggregates': [uuids.agg1], - 'resource_provider_generation': - resp.json()['resource_provider_generation']}, - version='1.19') - self.client.put( - '/resource_providers/%s/traits' % uuids.ssp, - {'traits': ['MISC_SHARES_VIA_AGGREGATE'], - 'resource_provider_generation': - resp.json()['resource_provider_generation']}) - - self.client.update_from_provider_tree(self.context, ptree) - - # Another unrelated compute node. We don't use the report client's - # convenience methods because we don't want this guy in the cache. - resp = self.client.post( - '/resource_providers', - {'uuid': uuids.othercn, 'name': 'othercn'}, version='1.20') - resp = self.client.put( - '/resource_providers/%s/inventories' % uuids.othercn, - {'inventories': {'VCPU': {'total': 8}, - 'MEMORY_MB': {'total': 1024}}, - 'resource_provider_generation': resp.json()['generation']}) - # Part of the shared storage aggregate - self.client.put( - '/resource_providers/%s/aggregates' % uuids.othercn, - {'aggregates': [uuids.agg1], - 'resource_provider_generation': - resp.json()['resource_provider_generation']}, - version='1.19') + self._set_up_provider_tree() # At this point, there are no allocations self.assertEqual({}, self.client.get_allocations_for_provider_tree( self.context, self.compute_name)) - # Create some allocations on our compute (with sharing) - cn_inst1_allocs = { - 'allocations': { - self.compute_uuid: {'resources': {'MEMORY_MB': 512}}, - uuids.numa1: {'resources': {'VCPU': 2, 'CUSTOM_PCPU': 2}}, - uuids.ssp: {'resources': {'DISK_GB': 100}} - }, - 'consumer_generation': None, - 'project_id': uuids.proj, - 'user_id': uuids.user, - } - self.client.put('/allocations/' + uuids.cn_inst1, cn_inst1_allocs) - cn_inst2_allocs = { - 'allocations': { - self.compute_uuid: {'resources': {'MEMORY_MB': 256}}, - uuids.numa2: {'resources': {'CUSTOM_PCPU': 1}}, - uuids.ssp: {'resources': {'DISK_GB': 50}} - }, - 'consumer_generation': None, - 'project_id': uuids.proj, - 'user_id': uuids.user, - } - self.client.put('/allocations/' + uuids.cn_inst2, cn_inst2_allocs) - # And on the other compute (with sharing) - self.client.put( - '/allocations/' + uuids.othercn_inst, - {'allocations': { - uuids.othercn: {'resources': {'VCPU': 2, 'MEMORY_MB': 64}}, - uuids.ssp: {'resources': {'DISK_GB': 30}} - }, - 'consumer_generation': None, - 'project_id': uuids.proj, - 'user_id': uuids.user, - }) + cn_inst1_allocs, cn_inst2_allocs = ( + self._set_up_provider_tree_allocs()) # And now we should get all the right allocations. Note that we see # nothing from othercn_inst. @@ -1157,3 +1174,47 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase): if 'generation' in alloc: del alloc['generation'] self.assertEqual(expected, actual) + + def test_reshape(self): + """Smoke test the report client shim for the reshaper API.""" + with self._interceptor(): + # Simulate placement API communication failure + with mock.patch.object( + self.client, 'post', side_effect=kse.MissingAuthPlugin): + self.assertRaises(kse.ClientException, + self.client._reshape, self.context, {}, {}) + + # Invalid payload (empty inventories) results in a 409, which the + # report client converts to ReshapeFailed + try: + self.client._reshape(self.context, {}, {}) + except exception.ReshapeFailed as e: + self.assertIn('JSON does not validate: {} does not have ' + 'enough properties', e.kwargs['error']) + + # Okay, do some real stuffs. We're just smoke-testing that we can + # hit a good path to the API here; real testing of the API happens + # in gabbits and via update_from_provider_tree. + self._set_up_provider_tree() + self._set_up_provider_tree_allocs() + + ptree = self.client.get_provider_tree_and_ensure_root( + self.context, self.compute_uuid) + inventories = {} + for rp_uuid in ptree.get_provider_uuids(): + data = ptree.data(rp_uuid) + # Add a new resource class to the inventories + inventories[rp_uuid] = { + "inventories": dict(data.inventory, + CUSTOM_FOO={'total': 10}), + "resource_provider_generation": data.generation + } + + allocs = self.client.get_allocations_for_provider_tree( + self.context, self.compute_name) + for alloc in allocs.values(): + for res in alloc['allocations'].values(): + res['resources']['CUSTOM_FOO'] = 1 + + resp = self.client._reshape(self.context, inventories, allocs) + self.assertEqual(204, resp.status_code)