diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 2a4320598775..a13289b2e816 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -1224,6 +1224,13 @@ This command requires that the Added :option:`--force` option. +.. versionchanged:: 25.0.0 (Yoga) + + Added support for healing port allocations if port-resource-request-groups + neutron API extension is enabled and therefore ports can request multiple + group of resources e.g. by using both guaranteed minimum bandwidth and + guaranteed minimum packet rate QoS policy rules. + .. rubric:: Options .. option:: --max-count diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index fcd3eb0c8e30..ee81d5f13b3c 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -28,6 +28,7 @@ import re import sys import time import traceback +import typing as ty from urllib import parse as urlparse from dateutil import parser as dateutil_parser @@ -1458,48 +1459,13 @@ class PlacementCommands(object): instance_uuid=instance.uuid, error=str(e)) @staticmethod - def _has_request_but_no_allocation(port): - request = port.get(constants.RESOURCE_REQUEST) + def _has_request_but_no_allocation(port, neutron): + has_res_req = neutron_api.API()._has_resource_request( + context.get_admin_context(), port, neutron) + binding_profile = neutron_api.get_binding_profile(port) allocation = binding_profile.get(constants.ALLOCATION) - # We are defensive here about 'resources' and 'required' in the - # 'resource_request' as neutron API is not clear about those fields - # being optional. - return (request and request.get('resources') and - request.get('required') and - not allocation) - - @staticmethod - def _get_rps_in_tree_with_required_traits( - ctxt, rp_uuid, required_traits, placement): - """Find the RPs that have all the required traits in the given rp tree. - - :param ctxt: nova.context.RequestContext - :param rp_uuid: the RP uuid that will be used to query the tree. - :param required_traits: the traits that need to be supported by - the returned resource providers. - :param placement: nova.scheduler.client.report.SchedulerReportClient - to communicate with the Placement service API. - :raise PlacementAPIConnectFailure: if placement API cannot be reached - :raise ResourceProviderRetrievalFailed: if the resource provider does - not exist. - :raise ResourceProviderTraitRetrievalFailed: if resource provider - trait information cannot be read from placement. - :return: A list of RP UUIDs that supports every required traits and - in the tree for the provider rp_uuid. - """ - try: - rps = placement.get_providers_in_tree(ctxt, rp_uuid) - matching_rps = [ - rp['uuid'] - for rp in rps - if set(required_traits).issubset( - placement.get_provider_traits(ctxt, rp['uuid']).traits) - ] - except ks_exc.ClientException: - raise exception.PlacementAPIConnectFailure() - - return matching_rps + return has_res_req and not allocation @staticmethod def _merge_allocations(alloc1, alloc2): @@ -1525,67 +1491,105 @@ class PlacementCommands(object): allocations[rp_uuid]['resources'][rc] += amount return allocations - def _get_port_allocation( - self, ctxt, node_uuid, port, instance_uuid, placement): - """Return the extra allocation the instance needs due to the given - port. + @staticmethod + def _get_resource_request_from_ports( + ctxt: context.RequestContext, + ports: ty.List[ty.Dict[str, ty.Any]] + ) -> ty.Tuple[ + ty.Dict[str, ty.List['objects.RequestGroup']], + 'objects.RequestLevelParams']: + """Collect RequestGroups and RequestLevelParams for all ports - :param ctxt: nova.context.RequestContext - :param node_uuid: the ComputeNode uuid the instance is running on. - :param port: the port dict returned from neutron - :param instance_uuid: The uuid of the instance the port is bound to - :param placement: nova.scheduler.client.report.SchedulerReportClient - to communicate with the Placement service API. - :raise PlacementAPIConnectFailure: if placement API cannot be reached - :raise ResourceProviderRetrievalFailed: compute node resource provider - does not exist. - :raise ResourceProviderTraitRetrievalFailed: if resource provider - trait information cannot be read from placement. - :raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided - unambiguously which resource provider to heal from. - :raise NoResourceProviderToHealFrom: if there is no resource provider - found to heal from. - :return: A dict of resources keyed by RP uuid to be included in the - instance allocation dict. + :param ctxt: the request context + :param ports: a list of port dicts + :returns: A two tuple where the first item is a dict mapping port + uuids to a list of request groups coming from that port, the + second item is a combined RequestLevelParams object from all ports. """ - matching_rp_uuids = self._get_rps_in_tree_with_required_traits( - ctxt, node_uuid, port[constants.RESOURCE_REQUEST]['required'], - placement) + groups = {} + request_level_params = objects.RequestLevelParams() + extended_res_req = ( + neutron_api.API().has_extended_resource_request_extension( + ctxt) + ) - if len(matching_rp_uuids) > 1: - # If there is more than one such RP then it is an ambiguous - # situation that we cannot handle here efficiently because that - # would require the reimplementation of most of the allocation - # candidate query functionality of placement. Also if more - # than one such RP exists then selecting the right one might - # need extra information from the compute node. For example - # which PCI PF the VF is allocated from and which RP represents - # that PCI PF in placement. When migration is supported with such - # servers then we can ask the admin to migrate these servers - # instead to heal their allocation. - raise exception.MoreThanOneResourceProviderToHealFrom( - rp_uuids=','.join(matching_rp_uuids), - port_id=port['id'], - instance_uuid=instance_uuid) + for port in ports: + resource_request = port.get(constants.RESOURCE_REQUEST) + if extended_res_req: + groups[port['id']] = ( + objects.RequestGroup.from_extended_port_request( + ctxt, resource_request + ) + ) + request_level_params.extend_with( + objects.RequestLevelParams.from_port_request( + resource_request + ) + ) + else: + # This is the legacy format, only one group per port and no + # request level param support + # TODO(gibi): remove this path once the extended resource + # request extension is mandatory in neutron + groups[port['id']] = [ + objects.RequestGroup.from_port_request( + ctxt, port['id'], resource_request + ) + ] - if len(matching_rp_uuids) == 0: - raise exception.NoResourceProviderToHealFrom( - port_id=port['id'], - instance_uuid=instance_uuid, - traits=port[constants.RESOURCE_REQUEST]['required'], - node_uuid=node_uuid) + return groups, request_level_params - # We found one RP that matches the traits. Assume that we can allocate - # the resources from it. If there is not enough inventory left on the - # RP then the PUT /allocations placement call will detect that. - rp_uuid = matching_rp_uuids[0] + @staticmethod + def _get_port_binding_profile_allocation( + ctxt: context.RequestContext, + neutron: neutron_api.ClientWrapper, + port: ty.Dict[str, ty.Any], + request_groups: ty.List['objects.RequestGroup'], + resource_provider_mapping: ty.Dict[str, ty.List[str]], + ) -> ty.Dict[str, str]: + """Generate the value of the allocation key of the port binding profile + based on the provider mapping returned from placement - port_allocation = { - rp_uuid: { - 'resources': port[constants.RESOURCE_REQUEST]['resources'] + :param ctxt: the request context + :param neutron: the neutron client + :param port: the port dict from neutron + :param request_groups: the list of RequestGroups object generated from + the port resource request + :param resource_provider_mapping: The dict of request group to resource + provider mapping returned by the Placement allocation candidate + query + :returns: a dict mapping request group ids to resource provider uuids + in the form as Neutron expects in the port binding profile. + """ + if neutron_api.API().has_extended_resource_request_extension( + ctxt, neutron + ): + # The extended resource request format also means that a + # port has more than a one request groups. + # Each request group id from the port needs to be mapped to + # a single provider id from the provider mappings. Each + # group from the port is mapped to a numbered request group + # in placement so we can assume that they are mapped to + # a single provider and therefore the provider mapping list + # has a single provider id. + allocation = { + group.requester_id: resource_provider_mapping[ + group.requester_id][0] + for group in request_groups } - } - return port_allocation + else: + # This is the legacy resource request format where a port + # is mapped to a single request group + # NOTE(gibi): In the resource provider mapping there can be + # more than one RP fulfilling a request group. But resource + # requests of a Neutron port is always mapped to a + # numbered request group that is always fulfilled by one + # resource provider. So we only pass that single RP UUID + # here. + allocation = resource_provider_mapping[ + port['id']][0] + + return allocation def _get_port_allocations_to_heal( self, ctxt, instance, node_cache, placement, neutron, output): @@ -1604,15 +1608,10 @@ class PlacementCommands(object): :raise nova.exception.ComputeHostNotFound: if compute node of the instance not found in the db. :raise PlacementAPIConnectFailure: if placement API cannot be reached - :raise ResourceProviderRetrievalFailed: if the resource provider - representing the compute node the instance is running on does not - exist. - :raise ResourceProviderTraitRetrievalFailed: if resource provider - trait information cannot be read from placement. - :raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided - unambiguously which resource provider to heal from. - :raise NoResourceProviderToHealFrom: if there is no resource provider - found to heal from. + :raise AllocationUpdateFailed: if there is either no allocation + candidate returned from placement for the missing port allocations + or there are more than one candidates making the healing + ambiguous. :return: A two tuple where the first item is a dict of resources keyed by RP uuid to be included in the instance allocation dict. The second item is a list of port dicts to be updated in Neutron. @@ -1629,7 +1628,7 @@ class PlacementCommands(object): # are not on any host. ports_to_heal = [ port for port in self._get_ports(ctxt, instance, neutron) - if self._has_request_but_no_allocation(port)] + if self._has_request_but_no_allocation(port, neutron)] if not ports_to_heal: # nothing to do, return early @@ -1638,26 +1637,107 @@ class PlacementCommands(object): node_uuid = self._get_compute_node_uuid( ctxt, instance, node_cache) - allocations = {} + # NOTE(gibi): We need to handle both legacy and extended resource + # request. So we need to handle ports with multiple request groups + # allocating from multiple providers. + # The logic what we follow here is pretty similar to the logic + # implemented in ComputeManager._allocate_port_resource_for_instance + # for the interface attach case. We just apply it to more then one + # ports here. + request_groups_per_port, req_lvl_params = ( + self._get_resource_request_from_ports(ctxt, ports_to_heal) + ) + # flatten the list of list of groups + request_groups = [ + group + for groups in request_groups_per_port.values() + for group in groups + ] + + # we can have multiple request groups, it would be enough to restrict + # only one of them to the compute tree but for symmetry we restrict + # all of them + for request_group in request_groups: + request_group.in_tree = node_uuid + + # If there are multiple groups then the group_policy is mandatory in + # the allocation candidate query. We can assume that if this instance + # booted successfully then we have the policy in the flavor. If there + # is only one group and therefore no policy then the value of the + # policy in the allocation candidate query is ignored, so we simply + # default it here. + group_policy = instance.flavor.extra_specs.get("group_policy", "none") + + rr = scheduler_utils.ResourceRequest.from_request_groups( + request_groups, req_lvl_params, group_policy) + res = placement.get_allocation_candidates(ctxt, rr) + # NOTE(gibi): the get_allocation_candidates method has the + # @safe_connect decorator applied. Such decorator will return None + # if the connection to Placement is failed. So we raise an exception + # here. The case when Placement successfully return a response, even + # if it is a negative or empty response, the method will return a three + # tuple. That case is handled couple of lines below. + if not res: + raise exception.PlacementAPIConnectFailure() + alloc_reqs, __, __ = res + + if not alloc_reqs: + port_ids = [port['id'] for port in ports_to_heal] + raise exception.AllocationUpdateFailed( + consumer_uuid=instance.uuid, + error=f'Placement returned no allocation candidate to fulfill ' + f'the resource request of the port(s) {port_ids}' + ) + if len(alloc_reqs) > 1: + # If there is more than one candidates then it is an ambiguous + # situation that we cannot handle here because selecting the right + # one might need extra information from the compute node. For + # example which PCI PF the VF is allocated from and which RP + # represents that PCI PF in placement. + # TODO(gibi): One way to get that missing information to resolve + # ambiguity would be to load up the InstancePciRequest objects and + # try to use the parent_if_name in their spec to find the proper + # candidate that allocates for the same port from the PF RP that + # has the same name. + port_ids = [port['id'] for port in ports_to_heal] + raise exception.AllocationUpdateFailed( + consumer_uuid=instance.uuid, + error=f'Placement returned more than one possible allocation ' + f'candidates to fulfill the resource request of the ' + f'port(s) {port_ids}. This script does not have enough ' + f'information to select the proper candidate to heal the' + f'missing allocations. A possible way to heal the' + f'allocation of this instance is to migrate it to ' + f'another compute as the migration process re-creates ' + f'the full allocation on the target host.' + ) + + # so we have one candidate, lets use that to get the needed allocations + # and the provider mapping for the ports' binding profile + alloc_req = alloc_reqs[0] + allocations = alloc_req["allocations"] + provider_mappings = alloc_req["mappings"] + for port in ports_to_heal: - port_allocation = self._get_port_allocation( - ctxt, node_uuid, port, instance.uuid, placement) - rp_uuid = list(port_allocation)[0] - allocations = self._merge_allocations( - allocations, port_allocation) - # We also need to record the RP we are allocated from in the + # We also need to record the RPs we are allocated from in the # port. This will be sent back to Neutron before the allocation # is updated in placement + profile_allocation = self._get_port_binding_profile_allocation( + ctxt, neutron, port, request_groups_per_port[port['id']], + provider_mappings + ) binding_profile = neutron_api.get_binding_profile(port) - binding_profile[constants.ALLOCATION] = rp_uuid + binding_profile[constants.ALLOCATION] = profile_allocation port[constants.BINDING_PROFILE] = binding_profile - output(_("Found resource provider %(rp_uuid)s having matching " - "traits for port %(port_uuid)s with resource request " - "%(request)s attached to instance %(instance_uuid)s") % - {"rp_uuid": rp_uuid, "port_uuid": port["id"], - "request": port.get(constants.RESOURCE_REQUEST), - "instance_uuid": instance.uuid}) + output(_( + "Found a request group : resource provider mapping " + "%(mapping)s for the port %(port_uuid)s with resource request " + "%(request)s attached to the instance %(instance_uuid)s") % + {"mapping": profile_allocation, "port_uuid": port['id'], + "request": port.get(constants.RESOURCE_REQUEST), + "instance_uuid": instance.uuid} + ) return allocations, ports_to_heal @@ -1791,15 +1871,6 @@ class PlacementCommands(object): a given instance with consumer project/user information :raise UnableToQueryPorts: If the neutron list ports query fails. :raise PlacementAPIConnectFailure: if placement API cannot be reached - :raise ResourceProviderRetrievalFailed: if the resource provider - representing the compute node the instance is running on does not - exist. - :raise ResourceProviderTraitRetrievalFailed: if resource provider - trait information cannot be read from placement. - :raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided - unambiguously which resource provider to heal from. - :raise NoResourceProviderToHealFrom: if there is no resource provider - found to heal from. :raise UnableToUpdatePorts: if a port update failed in neutron but any partial update was rolled back successfully. :raise UnableToRollbackPortUpdates: if a port update failed in neutron @@ -1956,15 +2027,6 @@ class PlacementCommands(object): a given instance with consumer project/user information :raise UnableToQueryPorts: If the neutron list ports query fails. :raise PlacementAPIConnectFailure: if placement API cannot be reached - :raise ResourceProviderRetrievalFailed: if the resource provider - representing the compute node the instance is running on does not - exist. - :raise ResourceProviderTraitRetrievalFailed: if resource provider - trait information cannot be read from placement. - :raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided - unambiguously which resource provider to heal from. - :raise NoResourceProviderToHealFrom: if there is no resource provider - found to heal from. :raise UnableToUpdatePorts: if a port update failed in neutron but any partial update was rolled back successfully. :raise UnableToRollbackPortUpdates: if a port update failed in neutron @@ -2186,13 +2248,11 @@ class PlacementCommands(object): except exception.ComputeHostNotFound as e: print(e.format_message()) return 2 - except (exception.AllocationCreateFailed, - exception.AllocationUpdateFailed, - exception.NoResourceProviderToHealFrom, - exception.MoreThanOneResourceProviderToHealFrom, - exception.PlacementAPIConnectFailure, - exception.ResourceProviderRetrievalFailed, - exception.ResourceProviderTraitRetrievalFailed) as e: + except ( + exception.AllocationCreateFailed, + exception.AllocationUpdateFailed, + exception.PlacementAPIConnectFailure + ) as e: print(e.format_message()) return 3 except exception.UnableToQueryPorts as e: diff --git a/nova/exception.py b/nova/exception.py index 5ee06a737914..1884c3e90028 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2291,28 +2291,6 @@ class HealPortAllocationException(NovaException): msg_fmt = _("Healing port allocation failed.") -class MoreThanOneResourceProviderToHealFrom(HealPortAllocationException): - msg_fmt = _("More than one matching resource provider %(rp_uuids)s is " - "available for healing the port allocation for port " - "%(port_id)s for instance %(instance_uuid)s. This script " - "does not have enough information to select the proper " - "resource provider from which to heal.") - - -class NoResourceProviderToHealFrom(HealPortAllocationException): - msg_fmt = _("No matching resource provider is " - "available for healing the port allocation for port " - "%(port_id)s for instance %(instance_uuid)s. There are no " - "resource providers with matching traits %(traits)s in the " - "provider tree of the resource provider %(node_uuid)s ." - "This probably means that the neutron QoS configuration is " - "wrong. Consult with " - "https://docs.openstack.org/neutron/latest/admin/" - "config-qos-min-bw.html for information on how to configure " - "neutron. If the configuration is fixed the script can be run " - "again.") - - class UnableToQueryPorts(HealPortAllocationException): msg_fmt = _("Unable to query ports for instance %(instance_uuid)s: " "%(error)s") diff --git a/nova/tests/functional/test_nova_manage.py b/nova/tests/functional/test_nova_manage.py index 03cc2c90d5bf..79f88c38de47 100644 --- a/nova/tests/functional/test_nova_manage.py +++ b/nova/tests/functional/test_nova_manage.py @@ -1,4 +1,5 @@ # Licensed under the Apache License, Version 2.0 (the "License"); you may +# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # @@ -20,7 +21,7 @@ import mock from neutronclient.common import exceptions as neutron_client_exc import os_resource_classes as orc from oslo_serialization import jsonutils -from oslo_utils.fixture import uuidsentinel +from oslo_utils.fixture import uuidsentinel as uuids from oslo_utils import timeutils from nova.cmd import manage @@ -33,7 +34,7 @@ from nova import test from nova.tests import fixtures as nova_fixtures from nova.tests.functional import fixtures as func_fixtures from nova.tests.functional import integrated_helpers -from nova.tests.functional import test_servers_resource_request +from nova.tests.functional import test_servers_resource_request as test_res_req from nova import utils as nova_utils CONF = config.CONF @@ -1063,7 +1064,8 @@ class TestNovaManagePlacementHealAllocations( class TestNovaManagePlacementHealPortAllocations( - test_servers_resource_request.PortResourceRequestBasedSchedulingTestBase): + test_res_req.PortResourceRequestBasedSchedulingTestBase +): def setUp(self): super(TestNovaManagePlacementHealPortAllocations, self).setUp() @@ -1087,15 +1089,17 @@ class TestNovaManagePlacementHealPortAllocations( bound_port = self.neutron._ports[port_id] bound_port[constants.RESOURCE_REQUEST] = resource_request + @staticmethod + def _get_default_resource_request(port_uuid): + return { + "resources": { + orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, + orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, + "required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"] + } + def _create_server_with_missing_port_alloc( self, ports, resource_request=None): - if not resource_request: - resource_request = { - "resources": { - orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, - orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, - "required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"] - } server = self._create_server( flavor=self.flavor, @@ -1105,14 +1109,22 @@ class TestNovaManagePlacementHealPortAllocations( # This is a hack to simulate that we have a server that is missing # allocation for its port for port in ports: - self._add_resource_request_to_a_bound_port( - port['id'], resource_request) + if not resource_request: + rr = self._get_default_resource_request(port['id']) + else: + rr = resource_request + + self._add_resource_request_to_a_bound_port(port['id'], rr) updated_ports = [ self.neutron.show_port(port['id'])['port'] for port in ports] return server, updated_ports + def _get_resource_request(self, port): + res_req = port[constants.RESOURCE_REQUEST] + return res_req + def _assert_placement_updated(self, server, ports): rsp = self.placement.get( '/allocations/%s' % server['id'], @@ -1137,7 +1149,7 @@ class TestNovaManagePlacementHealPortAllocations( # bridge RP total_request = collections.defaultdict(int) for port in ports: - port_request = port[constants.RESOURCE_REQUEST]['resources'] + port_request = self._get_resource_request(port)['resources'] for rc, amount in port_request.items(): total_request[rc] += amount self.assertEqual(total_request, network_allocations) @@ -1362,83 +1374,64 @@ class TestNovaManagePlacementHealPortAllocations( 'Successfully created allocations for instance', output) self.assertEqual(0, result) - def test_heal_port_allocation_not_enough_resources_for_port(self): - """Test that a port needs allocation but not enough inventory - available. - """ + @staticmethod + def _get_too_big_resource_request(): # The port will request too much NET_BW_IGR_KILOBIT_PER_SEC so there is # no RP on the host that can provide it. - resource_request = { + return { "resources": { orc.NET_BW_IGR_KILOBIT_PER_SEC: 100000000000, orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, "required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"] } + + def test_heal_port_allocation_not_enough_resources_for_port(self): + """Test that a port needs allocation but not enough inventory + available. + """ server, ports = self._create_server_with_missing_port_alloc( - [self.neutron.port_1], resource_request) + [self.neutron.port_1], self._get_too_big_resource_request()) # let's trigger a heal result = self.cli.heal_allocations(verbose=True, max_count=2) self._assert_placement_not_updated(server) - # Actually the ports were updated but the update is rolled back when - # the placement update failed self._assert_ports_not_updated(ports) output = self.output.getvalue() self.assertIn( - 'Rolling back port update', - output) - self.assertIn( - 'Failed to update allocations for consumer', + 'Placement returned no allocation candidate', output) self.assertEqual(3, result) - def test_heal_port_allocation_no_rp_providing_required_traits(self): - """Test that a port needs allocation but no rp is providing the - required traits. - """ - # The port will request a trait, CUSTOM_PHYSNET_NONEXISTENT that will - # not be provided by any RP on this host - resource_request = { - "resources": { - orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, - orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, - "required": ["CUSTOM_PHYSNET_NONEXISTENT", - "CUSTOM_VNIC_TYPE_NORMAL"] - } - server, ports = self._create_server_with_missing_port_alloc( - [self.neutron.port_1], resource_request) - - # let's trigger a heal - result = self.cli.heal_allocations(verbose=True, max_count=2) - - self._assert_placement_not_updated(server) - self._assert_ports_not_updated(ports) - - self.assertIn( - 'No matching resource provider is available for healing the port ' - 'allocation', - self.output.getvalue()) - self.assertEqual(3, result) - - def test_heal_port_allocation_ambiguous_rps(self): - """Test that there are more than one matching RPs are available on the - compute. - """ - - # The port will request CUSTOM_VNIC_TYPE_DIRECT trait and there are - # two RPs that supports such trait. - resource_request = { + @staticmethod + def _get_ambiguous_resource_request(): + return { "resources": { orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, "required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_DIRECT"] } + + def test_heal_port_allocation_ambiguous_candidates(self): + """Test that there are more than one matching set of RPs are available + on the compute. + """ + # Add bandwidth inventory for PF3 so that both FP2 and FP3 could + # support the port's request making the situation ambiguous + inventories = { + orc.NET_BW_IGR_KILOBIT_PER_SEC: {"total": 100000}, + orc.NET_BW_EGR_KILOBIT_PER_SEC: {"total": 100000}, + } + self._set_provider_inventories( + self.sriov_dev_rp_per_host[self.compute1_rp_uuid][self.PF3], + {"inventories": inventories} + ) + server, ports = self._create_server_with_missing_port_alloc( - [self.neutron.port_1], resource_request) + [self.neutron.port_1], self._get_ambiguous_resource_request()) # let's trigger a heal result = self.cli.heal_allocations(verbose=True, max_count=2) @@ -1447,7 +1440,7 @@ class TestNovaManagePlacementHealPortAllocations( self._assert_ports_not_updated(ports) self.assertIn( - 'More than one matching resource provider', + ' Placement returned more than one possible allocation candidates', self.output.getvalue()) self.assertEqual(3, result) @@ -1595,12 +1588,18 @@ class TestNovaManagePlacementHealPortAllocations( output) self.assertEqual(7, result) - def _test_heal_port_allocation_placement_unavailable( - self, server, ports, error): + def test_heal_port_allocation_placement_unavailable_during_a_c(self): + server, ports = self._create_server_with_missing_port_alloc( + [self.neutron.port_1]) - with mock.patch('nova.cmd.manage.PlacementCommands.' - '_get_rps_in_tree_with_required_traits', - side_effect=error): + # Simulate that placement is unavailable during the allocation + # candidate query. The safe_connect decorator signals that via + # returning None. + with mock.patch( + 'nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocation_candidates', + return_value=None + ): result = self.cli.heal_allocations(verbose=True, max_count=2) self._assert_placement_not_updated(server) @@ -1608,18 +1607,194 @@ class TestNovaManagePlacementHealPortAllocations( self.assertEqual(3, result) - def test_heal_port_allocation_placement_unavailable(self): + def test_heal_port_allocation_placement_unavailable_during_update(self): server, ports = self._create_server_with_missing_port_alloc( [self.neutron.port_1]) - for error in [ - exception.PlacementAPIConnectFailure(), - exception.ResourceProviderRetrievalFailed(uuid=uuidsentinel.rp1), - exception.ResourceProviderTraitRetrievalFailed( - uuid=uuidsentinel.rp1)]: + # Simulate that placement is unavailable during updating the + # allocation. The retry decorator signals that via returning False + with mock.patch( + 'nova.scheduler.client.report.SchedulerReportClient.' + 'put_allocations', + return_value=False + ): + result = self.cli.heal_allocations(verbose=True, max_count=2) - self._test_heal_port_allocation_placement_unavailable( - server, ports, error) + self._assert_placement_not_updated(server) + # Actually there was a port update but that was rolled back when + # the allocation update failed + self._assert_ports_not_updated(ports) + + output = self.output.getvalue() + self.assertIn( + 'Rolling back port update', + output) + self.assertEqual(3, result) + + +class TestNovaManagePlacementHealPortAllocationsExtended( + TestNovaManagePlacementHealPortAllocations +): + """Run the same tests as in TestNovaManagePlacementHealPortAllocations but + with the ExtendedResourceRequestNeutronFixture that automatically + translates the resource request in the ports used by these test to the + extended format. Note that this will test the extended format handling but + only with a single request group per port. + """ + def setUp(self): + super().setUp() + self.neutron = self.useFixture( + test_res_req.ExtendedResourceRequestNeutronFixture(self)) + + def _get_resource_request(self, port): + # we assume a single resource request group in this test class + res_req = port[constants.RESOURCE_REQUEST][constants.REQUEST_GROUPS] + assert len(res_req) == 1 + return res_req[0] + + def _assert_port_updated(self, port_uuid): + updated_port = self.neutron.show_port(port_uuid)['port'] + binding_profile = updated_port.get('binding:profile', {}) + self.assertEqual( + {port_uuid: self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]}, + binding_profile['allocation']) + + +class TestNovaManagePlacementHealPortAllocationsMultiGroup( + TestNovaManagePlacementHealPortAllocations +): + """Run the same tests as in TestNovaManagePlacementHealPortAllocations but + with the MultiGroupResourceRequestNeutronFixture to test with extended + resource request with multiple groups. + """ + def setUp(self): + super().setUp() + self.neutron = self.useFixture( + test_res_req.MultiGroupResourceRequestNeutronFixture(self)) + + @staticmethod + def _get_default_resource_request(port_uuid): + # we need unique uuids per port for multi port tests + g1 = getattr(uuids, port_uuid + "group1") + g2 = getattr(uuids, port_uuid + "group2") + return { + "request_groups": [ + { + "id": g1, + "resources": { + orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, + orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, + "required": [ + "CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL" + ] + }, + { + "id": g2, + "resources": { + orc.NET_PACKET_RATE_KILOPACKET_PER_SEC: 1000 + }, + "required": ["CUSTOM_VNIC_TYPE_NORMAL"] + } + ], + "same_subtree": [g1, g2], + } + + @staticmethod + def _get_too_big_resource_request(): + # The port will request too much NET_BW_IGR_KILOBIT_PER_SEC so there is + # no RP on the host that can provide it. + return { + "request_groups": [ + { + "id": uuids.g1, + "resources": { + orc.NET_BW_IGR_KILOBIT_PER_SEC: 10000000000000, + orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, + "required": [ + "CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL" + ] + }, + { + "id": uuids.g2, + "resources": { + orc.NET_PACKET_RATE_KILOPACKET_PER_SEC: 1000 + }, + "required": ["CUSTOM_VNIC_TYPE_NORMAL"] + } + ], + "same_subtree": [uuids.g1, uuids.g2], + } + + @staticmethod + def _get_ambiguous_resource_request(): + # ambiguity cannot really be simulated with multiple groups as that + # would require multiple OVS bridges instead of multiple PFs. So + # falling back to a single group in this specific test case. + return { + "request_groups": [ + { + "id": uuids.g1, + "resources": { + orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000, + orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000}, + "required": [ + "CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_DIRECT" + ] + }, + ], + "same_subtree": [uuids.g1], + } + + def _assert_placement_updated(self, server, ports): + rsp = self.placement.get( + '/allocations/%s' % server['id'], + version=1.28).body + + allocations = rsp['allocations'] + + # we expect one allocation for the compute resources, one on the OVS + # bridge RP due to bandwidth and one on the OVS agent RP due to packet + # rate request + self.assertEqual(3, len(allocations)) + self.assertEqual( + self._resources_from_flavor(self.flavor), + allocations[self.compute1_rp_uuid]['resources']) + + self.assertEqual(server['tenant_id'], rsp['project_id']) + self.assertEqual(server['user_id'], rsp['user_id']) + + ovs_bridge_allocations = allocations[ + self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]]['resources'] + ovs_agent_allocations = allocations[ + self.ovs_agent_rp_per_host[self.compute1_rp_uuid]]['resources'] + + total_bandwidth_request = collections.defaultdict(int) + total_packet_rate_request = collections.defaultdict(int) + for port in ports: + res_req = (port.get(constants.RESOURCE_REQUEST) or {}) + for group in res_req.get(constants.REQUEST_GROUPS): + port_request = group['resources'] + for rc, amount in port_request.items(): + if rc == orc.NET_PACKET_RATE_KILOPACKET_PER_SEC: + total_packet_rate_request[rc] += amount + else: + total_bandwidth_request[rc] += amount + + self.assertEqual(total_bandwidth_request, ovs_bridge_allocations) + self.assertEqual(total_packet_rate_request, ovs_agent_allocations) + + def _assert_port_updated(self, port_uuid): + updated_port = self.neutron.show_port(port_uuid)['port'] + binding_profile = updated_port.get('binding:profile', {}) + self.assertEqual( + { + getattr(uuids, port_uuid + "group1"): + self.ovs_bridge_rp_per_host[self.compute1_rp_uuid], + getattr(uuids, port_uuid + "group2"): + self.ovs_agent_rp_per_host[self.compute1_rp_uuid], + }, + binding_profile['allocation'] + ) class TestNovaManagePlacementSyncAggregates( diff --git a/nova/tests/unit/cmd/test_manage.py b/nova/tests/unit/cmd/test_manage.py index 15aaeece03c4..cecefa30af0d 100644 --- a/nova/tests/unit/cmd/test_manage.py +++ b/nova/tests/unit/cmd/test_manage.py @@ -2780,6 +2780,10 @@ class TestNovaManagePlacement(test.NoDBTestCase): self.output.getvalue()) self.assertIn("Conflict!", self.output.getvalue()) + @mock.patch( + 'nova.network.neutron.API.has_extended_resource_request_extension', + new=mock.Mock(return_value=False), + ) def test_has_request_but_no_allocation(self): # False because there is a full resource_request and allocation set. self.assertFalse( @@ -2795,7 +2799,10 @@ class TestNovaManagePlacement(test.NoDBTestCase): ] }, 'binding:profile': {'allocation': uuidsentinel.rp1} - })) + }, + mock.sentinel.neutron, + ) + ) # True because there is a full resource_request but no allocation set. self.assertTrue( self.cli._has_request_but_no_allocation( @@ -2810,7 +2817,10 @@ class TestNovaManagePlacement(test.NoDBTestCase): ] }, 'binding:profile': {} - })) + }, + mock.sentinel.neutron, + ) + ) # True because there is a full resource_request but no allocation set. self.assertTrue( self.cli._has_request_but_no_allocation( @@ -2825,73 +2835,89 @@ class TestNovaManagePlacement(test.NoDBTestCase): ] }, 'binding:profile': None, - })) - # False because there are no resources in the resource_request. - self.assertFalse( - self.cli._has_request_but_no_allocation( - { - 'id': uuidsentinel.empty_resources, - 'resource_request': { - 'resources': {}, - 'required': [ - 'CUSTOM_VNIC_TYPE_NORMAL' - ] - }, - 'binding:profile': {} - })) - # False because there are no resources in the resource_request. - self.assertFalse( - self.cli._has_request_but_no_allocation( - { - 'id': uuidsentinel.missing_resources, - 'resource_request': { - 'required': [ - 'CUSTOM_VNIC_TYPE_NORMAL' - ] - }, - 'binding:profile': {} - })) - # False because there are no required traits in the resource_request. - self.assertFalse( - self.cli._has_request_but_no_allocation( - { - 'id': uuidsentinel.empty_required, - 'resource_request': { - 'resources': { - 'NET_BW_EGR_KILOBIT_PER_SEC': 1000, - }, - 'required': [] - }, - 'binding:profile': {} - })) - # False because there are no required traits in the resource_request. - self.assertFalse( - self.cli._has_request_but_no_allocation( - { - 'id': uuidsentinel.missing_required, - 'resource_request': { - 'resources': { - 'NET_BW_EGR_KILOBIT_PER_SEC': 1000, - }, - }, - 'binding:profile': {} - })) - # False because there are no resources or required traits in the - # resource_request. - self.assertFalse( - self.cli._has_request_but_no_allocation( - { - 'id': uuidsentinel.empty_resource_request, - 'resource_request': {}, - 'binding:profile': {} - })) + }, + mock.sentinel.neutron, + ) + ) # False because there is no resource_request. self.assertFalse( self.cli._has_request_but_no_allocation( { 'id': uuidsentinel.missing_resource_request, 'binding:profile': {} - })) + }, + mock.sentinel.neutron, + ) + ) + + @mock.patch( + 'nova.network.neutron.API.has_extended_resource_request_extension', + new=mock.Mock(return_value=True), + ) + def test_has_request_but_no_allocation_extended(self): + # False because there is resource_request and allocation set. + self.assertFalse( + self.cli._has_request_but_no_allocation( + { + 'id': uuidsentinel.healed, + 'resource_request': { + 'request_groups': [ + { + 'id': uuidsentinel.group1, + 'resources': { + 'NET_BW_EGR_KILOBIT_PER_SEC': 1000, + }, + 'required': [ + 'CUSTOM_VNIC_TYPE_NORMAL' + ] + }, + ], + }, + 'binding:profile': { + 'allocation': {uuidsentinel.group1: uuidsentinel.rp1} + } + }, + mock.sentinel.neutron, + ) + ) + # False because there no resource_request + self.assertFalse( + self.cli._has_request_but_no_allocation( + { + 'id': uuidsentinel.healed, + 'resource_request': None, + 'binding:profile': { + 'allocation': {uuidsentinel.group1: uuidsentinel.rp1} + } + }, + mock.sentinel.neutron, + ) + ) + # True because we have request but no allocation set + self.assertTrue( + self.cli._has_request_but_no_allocation( + { + 'id': uuidsentinel.healed, + 'resource_request': { + 'request_groups': [ + { + 'id': uuidsentinel.group1, + 'resources': { + 'NET_BW_EGR_KILOBIT_PER_SEC': 1000, + }, + 'required': [ + 'CUSTOM_VNIC_TYPE_NORMAL' + ] + }, + ], + }, + 'binding:profile': { + 'allocation': {} + } + }, + mock.sentinel.neutron, + ) + ) def test_update_ports_only_updates_binding_profile(self): """Simple test to make sure that only the port's binding:profile is