[nova-manage]support extended resource request

The nova-manage placement heal_allocations CLI is capable of healing
missing placement allocations due to port resource requests. To support
the new extended port resource request this code needs to be adapted
too.

When the heal_allocation command got the port resource request
support in train, the only way to figure out the missing allocations was
to dig into the placement RP tree directly. Since then nova gained
support for interface attach with such ports and to support that
placement gained support for in_tree filtering in allocation candidate
queries. So now the healing logic can be generalized to following:

For a given instance
1) Find the ports that has resource request but no allocation key in the
   binding profile. These are the ports we need to heal
2) Gather the RequestGroups from the these ports and run an
   allocation_candidates query restricted to the current compute of the
   instance with in_tree filtering.
3) Extend the existing instance allocation with a returned allocation
   candidate and update the instance allocation in placement.
4) Update the binding profile of these ports in neutron

The main change compared to the existing implementation is in step 2)
the rest mostly the same.

Note that support for old resource request format is kept alongside of
the new resource request format until Neutron makes the new format
mandatory.

blueprint: qos-minimum-guaranteed-packet-rate

Change-Id: I58869d2a5a4ed988fc786a6f1824be441dd48484
This commit is contained in:
Balazs Gibizer 2021-07-23 18:13:37 +02:00
parent e14eef0719
commit 90ed7e574d
5 changed files with 550 additions and 304 deletions

View File

@ -1224,6 +1224,13 @@ This command requires that the
Added :option:`--force` option.
.. versionchanged:: 25.0.0 (Yoga)
Added support for healing port allocations if port-resource-request-groups
neutron API extension is enabled and therefore ports can request multiple
group of resources e.g. by using both guaranteed minimum bandwidth and
guaranteed minimum packet rate QoS policy rules.
.. rubric:: Options
.. option:: --max-count <max_count>

View File

@ -28,6 +28,7 @@ import re
import sys
import time
import traceback
import typing as ty
from urllib import parse as urlparse
from dateutil import parser as dateutil_parser
@ -1458,48 +1459,13 @@ class PlacementCommands(object):
instance_uuid=instance.uuid, error=str(e))
@staticmethod
def _has_request_but_no_allocation(port):
request = port.get(constants.RESOURCE_REQUEST)
def _has_request_but_no_allocation(port, neutron):
has_res_req = neutron_api.API()._has_resource_request(
context.get_admin_context(), port, neutron)
binding_profile = neutron_api.get_binding_profile(port)
allocation = binding_profile.get(constants.ALLOCATION)
# We are defensive here about 'resources' and 'required' in the
# 'resource_request' as neutron API is not clear about those fields
# being optional.
return (request and request.get('resources') and
request.get('required') and
not allocation)
@staticmethod
def _get_rps_in_tree_with_required_traits(
ctxt, rp_uuid, required_traits, placement):
"""Find the RPs that have all the required traits in the given rp tree.
:param ctxt: nova.context.RequestContext
:param rp_uuid: the RP uuid that will be used to query the tree.
:param required_traits: the traits that need to be supported by
the returned resource providers.
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:raise PlacementAPIConnectFailure: if placement API cannot be reached
:raise ResourceProviderRetrievalFailed: if the resource provider does
not exist.
:raise ResourceProviderTraitRetrievalFailed: if resource provider
trait information cannot be read from placement.
:return: A list of RP UUIDs that supports every required traits and
in the tree for the provider rp_uuid.
"""
try:
rps = placement.get_providers_in_tree(ctxt, rp_uuid)
matching_rps = [
rp['uuid']
for rp in rps
if set(required_traits).issubset(
placement.get_provider_traits(ctxt, rp['uuid']).traits)
]
except ks_exc.ClientException:
raise exception.PlacementAPIConnectFailure()
return matching_rps
return has_res_req and not allocation
@staticmethod
def _merge_allocations(alloc1, alloc2):
@ -1525,67 +1491,105 @@ class PlacementCommands(object):
allocations[rp_uuid]['resources'][rc] += amount
return allocations
def _get_port_allocation(
self, ctxt, node_uuid, port, instance_uuid, placement):
"""Return the extra allocation the instance needs due to the given
port.
@staticmethod
def _get_resource_request_from_ports(
ctxt: context.RequestContext,
ports: ty.List[ty.Dict[str, ty.Any]]
) -> ty.Tuple[
ty.Dict[str, ty.List['objects.RequestGroup']],
'objects.RequestLevelParams']:
"""Collect RequestGroups and RequestLevelParams for all ports
:param ctxt: nova.context.RequestContext
:param node_uuid: the ComputeNode uuid the instance is running on.
:param port: the port dict returned from neutron
:param instance_uuid: The uuid of the instance the port is bound to
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:raise PlacementAPIConnectFailure: if placement API cannot be reached
:raise ResourceProviderRetrievalFailed: compute node resource provider
does not exist.
:raise ResourceProviderTraitRetrievalFailed: if resource provider
trait information cannot be read from placement.
:raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided
unambiguously which resource provider to heal from.
:raise NoResourceProviderToHealFrom: if there is no resource provider
found to heal from.
:return: A dict of resources keyed by RP uuid to be included in the
instance allocation dict.
:param ctxt: the request context
:param ports: a list of port dicts
:returns: A two tuple where the first item is a dict mapping port
uuids to a list of request groups coming from that port, the
second item is a combined RequestLevelParams object from all ports.
"""
matching_rp_uuids = self._get_rps_in_tree_with_required_traits(
ctxt, node_uuid, port[constants.RESOURCE_REQUEST]['required'],
placement)
groups = {}
request_level_params = objects.RequestLevelParams()
extended_res_req = (
neutron_api.API().has_extended_resource_request_extension(
ctxt)
)
if len(matching_rp_uuids) > 1:
# If there is more than one such RP then it is an ambiguous
# situation that we cannot handle here efficiently because that
# would require the reimplementation of most of the allocation
# candidate query functionality of placement. Also if more
# than one such RP exists then selecting the right one might
# need extra information from the compute node. For example
# which PCI PF the VF is allocated from and which RP represents
# that PCI PF in placement. When migration is supported with such
# servers then we can ask the admin to migrate these servers
# instead to heal their allocation.
raise exception.MoreThanOneResourceProviderToHealFrom(
rp_uuids=','.join(matching_rp_uuids),
port_id=port['id'],
instance_uuid=instance_uuid)
for port in ports:
resource_request = port.get(constants.RESOURCE_REQUEST)
if extended_res_req:
groups[port['id']] = (
objects.RequestGroup.from_extended_port_request(
ctxt, resource_request
)
)
request_level_params.extend_with(
objects.RequestLevelParams.from_port_request(
resource_request
)
)
else:
# This is the legacy format, only one group per port and no
# request level param support
# TODO(gibi): remove this path once the extended resource
# request extension is mandatory in neutron
groups[port['id']] = [
objects.RequestGroup.from_port_request(
ctxt, port['id'], resource_request
)
]
if len(matching_rp_uuids) == 0:
raise exception.NoResourceProviderToHealFrom(
port_id=port['id'],
instance_uuid=instance_uuid,
traits=port[constants.RESOURCE_REQUEST]['required'],
node_uuid=node_uuid)
return groups, request_level_params
# We found one RP that matches the traits. Assume that we can allocate
# the resources from it. If there is not enough inventory left on the
# RP then the PUT /allocations placement call will detect that.
rp_uuid = matching_rp_uuids[0]
@staticmethod
def _get_port_binding_profile_allocation(
ctxt: context.RequestContext,
neutron: neutron_api.ClientWrapper,
port: ty.Dict[str, ty.Any],
request_groups: ty.List['objects.RequestGroup'],
resource_provider_mapping: ty.Dict[str, ty.List[str]],
) -> ty.Dict[str, str]:
"""Generate the value of the allocation key of the port binding profile
based on the provider mapping returned from placement
port_allocation = {
rp_uuid: {
'resources': port[constants.RESOURCE_REQUEST]['resources']
:param ctxt: the request context
:param neutron: the neutron client
:param port: the port dict from neutron
:param request_groups: the list of RequestGroups object generated from
the port resource request
:param resource_provider_mapping: The dict of request group to resource
provider mapping returned by the Placement allocation candidate
query
:returns: a dict mapping request group ids to resource provider uuids
in the form as Neutron expects in the port binding profile.
"""
if neutron_api.API().has_extended_resource_request_extension(
ctxt, neutron
):
# The extended resource request format also means that a
# port has more than a one request groups.
# Each request group id from the port needs to be mapped to
# a single provider id from the provider mappings. Each
# group from the port is mapped to a numbered request group
# in placement so we can assume that they are mapped to
# a single provider and therefore the provider mapping list
# has a single provider id.
allocation = {
group.requester_id: resource_provider_mapping[
group.requester_id][0]
for group in request_groups
}
}
return port_allocation
else:
# This is the legacy resource request format where a port
# is mapped to a single request group
# NOTE(gibi): In the resource provider mapping there can be
# more than one RP fulfilling a request group. But resource
# requests of a Neutron port is always mapped to a
# numbered request group that is always fulfilled by one
# resource provider. So we only pass that single RP UUID
# here.
allocation = resource_provider_mapping[
port['id']][0]
return allocation
def _get_port_allocations_to_heal(
self, ctxt, instance, node_cache, placement, neutron, output):
@ -1604,15 +1608,10 @@ class PlacementCommands(object):
:raise nova.exception.ComputeHostNotFound: if compute node of the
instance not found in the db.
:raise PlacementAPIConnectFailure: if placement API cannot be reached
:raise ResourceProviderRetrievalFailed: if the resource provider
representing the compute node the instance is running on does not
exist.
:raise ResourceProviderTraitRetrievalFailed: if resource provider
trait information cannot be read from placement.
:raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided
unambiguously which resource provider to heal from.
:raise NoResourceProviderToHealFrom: if there is no resource provider
found to heal from.
:raise AllocationUpdateFailed: if there is either no allocation
candidate returned from placement for the missing port allocations
or there are more than one candidates making the healing
ambiguous.
:return: A two tuple where the first item is a dict of resources keyed
by RP uuid to be included in the instance allocation dict. The
second item is a list of port dicts to be updated in Neutron.
@ -1629,7 +1628,7 @@ class PlacementCommands(object):
# are not on any host.
ports_to_heal = [
port for port in self._get_ports(ctxt, instance, neutron)
if self._has_request_but_no_allocation(port)]
if self._has_request_but_no_allocation(port, neutron)]
if not ports_to_heal:
# nothing to do, return early
@ -1638,26 +1637,107 @@ class PlacementCommands(object):
node_uuid = self._get_compute_node_uuid(
ctxt, instance, node_cache)
allocations = {}
# NOTE(gibi): We need to handle both legacy and extended resource
# request. So we need to handle ports with multiple request groups
# allocating from multiple providers.
# The logic what we follow here is pretty similar to the logic
# implemented in ComputeManager._allocate_port_resource_for_instance
# for the interface attach case. We just apply it to more then one
# ports here.
request_groups_per_port, req_lvl_params = (
self._get_resource_request_from_ports(ctxt, ports_to_heal)
)
# flatten the list of list of groups
request_groups = [
group
for groups in request_groups_per_port.values()
for group in groups
]
# we can have multiple request groups, it would be enough to restrict
# only one of them to the compute tree but for symmetry we restrict
# all of them
for request_group in request_groups:
request_group.in_tree = node_uuid
# If there are multiple groups then the group_policy is mandatory in
# the allocation candidate query. We can assume that if this instance
# booted successfully then we have the policy in the flavor. If there
# is only one group and therefore no policy then the value of the
# policy in the allocation candidate query is ignored, so we simply
# default it here.
group_policy = instance.flavor.extra_specs.get("group_policy", "none")
rr = scheduler_utils.ResourceRequest.from_request_groups(
request_groups, req_lvl_params, group_policy)
res = placement.get_allocation_candidates(ctxt, rr)
# NOTE(gibi): the get_allocation_candidates method has the
# @safe_connect decorator applied. Such decorator will return None
# if the connection to Placement is failed. So we raise an exception
# here. The case when Placement successfully return a response, even
# if it is a negative or empty response, the method will return a three
# tuple. That case is handled couple of lines below.
if not res:
raise exception.PlacementAPIConnectFailure()
alloc_reqs, __, __ = res
if not alloc_reqs:
port_ids = [port['id'] for port in ports_to_heal]
raise exception.AllocationUpdateFailed(
consumer_uuid=instance.uuid,
error=f'Placement returned no allocation candidate to fulfill '
f'the resource request of the port(s) {port_ids}'
)
if len(alloc_reqs) > 1:
# If there is more than one candidates then it is an ambiguous
# situation that we cannot handle here because selecting the right
# one might need extra information from the compute node. For
# example which PCI PF the VF is allocated from and which RP
# represents that PCI PF in placement.
# TODO(gibi): One way to get that missing information to resolve
# ambiguity would be to load up the InstancePciRequest objects and
# try to use the parent_if_name in their spec to find the proper
# candidate that allocates for the same port from the PF RP that
# has the same name.
port_ids = [port['id'] for port in ports_to_heal]
raise exception.AllocationUpdateFailed(
consumer_uuid=instance.uuid,
error=f'Placement returned more than one possible allocation '
f'candidates to fulfill the resource request of the '
f'port(s) {port_ids}. This script does not have enough '
f'information to select the proper candidate to heal the'
f'missing allocations. A possible way to heal the'
f'allocation of this instance is to migrate it to '
f'another compute as the migration process re-creates '
f'the full allocation on the target host.'
)
# so we have one candidate, lets use that to get the needed allocations
# and the provider mapping for the ports' binding profile
alloc_req = alloc_reqs[0]
allocations = alloc_req["allocations"]
provider_mappings = alloc_req["mappings"]
for port in ports_to_heal:
port_allocation = self._get_port_allocation(
ctxt, node_uuid, port, instance.uuid, placement)
rp_uuid = list(port_allocation)[0]
allocations = self._merge_allocations(
allocations, port_allocation)
# We also need to record the RP we are allocated from in the
# We also need to record the RPs we are allocated from in the
# port. This will be sent back to Neutron before the allocation
# is updated in placement
profile_allocation = self._get_port_binding_profile_allocation(
ctxt, neutron, port, request_groups_per_port[port['id']],
provider_mappings
)
binding_profile = neutron_api.get_binding_profile(port)
binding_profile[constants.ALLOCATION] = rp_uuid
binding_profile[constants.ALLOCATION] = profile_allocation
port[constants.BINDING_PROFILE] = binding_profile
output(_("Found resource provider %(rp_uuid)s having matching "
"traits for port %(port_uuid)s with resource request "
"%(request)s attached to instance %(instance_uuid)s") %
{"rp_uuid": rp_uuid, "port_uuid": port["id"],
"request": port.get(constants.RESOURCE_REQUEST),
"instance_uuid": instance.uuid})
output(_(
"Found a request group : resource provider mapping "
"%(mapping)s for the port %(port_uuid)s with resource request "
"%(request)s attached to the instance %(instance_uuid)s") %
{"mapping": profile_allocation, "port_uuid": port['id'],
"request": port.get(constants.RESOURCE_REQUEST),
"instance_uuid": instance.uuid}
)
return allocations, ports_to_heal
@ -1791,15 +1871,6 @@ class PlacementCommands(object):
a given instance with consumer project/user information
:raise UnableToQueryPorts: If the neutron list ports query fails.
:raise PlacementAPIConnectFailure: if placement API cannot be reached
:raise ResourceProviderRetrievalFailed: if the resource provider
representing the compute node the instance is running on does not
exist.
:raise ResourceProviderTraitRetrievalFailed: if resource provider
trait information cannot be read from placement.
:raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided
unambiguously which resource provider to heal from.
:raise NoResourceProviderToHealFrom: if there is no resource provider
found to heal from.
:raise UnableToUpdatePorts: if a port update failed in neutron but any
partial update was rolled back successfully.
:raise UnableToRollbackPortUpdates: if a port update failed in neutron
@ -1956,15 +2027,6 @@ class PlacementCommands(object):
a given instance with consumer project/user information
:raise UnableToQueryPorts: If the neutron list ports query fails.
:raise PlacementAPIConnectFailure: if placement API cannot be reached
:raise ResourceProviderRetrievalFailed: if the resource provider
representing the compute node the instance is running on does not
exist.
:raise ResourceProviderTraitRetrievalFailed: if resource provider
trait information cannot be read from placement.
:raise MoreThanOneResourceProviderToHealFrom: if it cannot be decided
unambiguously which resource provider to heal from.
:raise NoResourceProviderToHealFrom: if there is no resource provider
found to heal from.
:raise UnableToUpdatePorts: if a port update failed in neutron but any
partial update was rolled back successfully.
:raise UnableToRollbackPortUpdates: if a port update failed in neutron
@ -2186,13 +2248,11 @@ class PlacementCommands(object):
except exception.ComputeHostNotFound as e:
print(e.format_message())
return 2
except (exception.AllocationCreateFailed,
exception.AllocationUpdateFailed,
exception.NoResourceProviderToHealFrom,
exception.MoreThanOneResourceProviderToHealFrom,
exception.PlacementAPIConnectFailure,
exception.ResourceProviderRetrievalFailed,
exception.ResourceProviderTraitRetrievalFailed) as e:
except (
exception.AllocationCreateFailed,
exception.AllocationUpdateFailed,
exception.PlacementAPIConnectFailure
) as e:
print(e.format_message())
return 3
except exception.UnableToQueryPorts as e:

View File

@ -2291,28 +2291,6 @@ class HealPortAllocationException(NovaException):
msg_fmt = _("Healing port allocation failed.")
class MoreThanOneResourceProviderToHealFrom(HealPortAllocationException):
msg_fmt = _("More than one matching resource provider %(rp_uuids)s is "
"available for healing the port allocation for port "
"%(port_id)s for instance %(instance_uuid)s. This script "
"does not have enough information to select the proper "
"resource provider from which to heal.")
class NoResourceProviderToHealFrom(HealPortAllocationException):
msg_fmt = _("No matching resource provider is "
"available for healing the port allocation for port "
"%(port_id)s for instance %(instance_uuid)s. There are no "
"resource providers with matching traits %(traits)s in the "
"provider tree of the resource provider %(node_uuid)s ."
"This probably means that the neutron QoS configuration is "
"wrong. Consult with "
"https://docs.openstack.org/neutron/latest/admin/"
"config-qos-min-bw.html for information on how to configure "
"neutron. If the configuration is fixed the script can be run "
"again.")
class UnableToQueryPorts(HealPortAllocationException):
msg_fmt = _("Unable to query ports for instance %(instance_uuid)s: "
"%(error)s")

View File

@ -1,4 +1,5 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
@ -20,7 +21,7 @@ import mock
from neutronclient.common import exceptions as neutron_client_exc
import os_resource_classes as orc
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
from nova.cmd import manage
@ -33,7 +34,7 @@ from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.functional import test_servers_resource_request
from nova.tests.functional import test_servers_resource_request as test_res_req
from nova import utils as nova_utils
CONF = config.CONF
@ -1063,7 +1064,8 @@ class TestNovaManagePlacementHealAllocations(
class TestNovaManagePlacementHealPortAllocations(
test_servers_resource_request.PortResourceRequestBasedSchedulingTestBase):
test_res_req.PortResourceRequestBasedSchedulingTestBase
):
def setUp(self):
super(TestNovaManagePlacementHealPortAllocations, self).setUp()
@ -1087,15 +1089,17 @@ class TestNovaManagePlacementHealPortAllocations(
bound_port = self.neutron._ports[port_id]
bound_port[constants.RESOURCE_REQUEST] = resource_request
@staticmethod
def _get_default_resource_request(port_uuid):
return {
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"]
}
def _create_server_with_missing_port_alloc(
self, ports, resource_request=None):
if not resource_request:
resource_request = {
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": ["CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"]
}
server = self._create_server(
flavor=self.flavor,
@ -1105,14 +1109,22 @@ class TestNovaManagePlacementHealPortAllocations(
# This is a hack to simulate that we have a server that is missing
# allocation for its port
for port in ports:
self._add_resource_request_to_a_bound_port(
port['id'], resource_request)
if not resource_request:
rr = self._get_default_resource_request(port['id'])
else:
rr = resource_request
self._add_resource_request_to_a_bound_port(port['id'], rr)
updated_ports = [
self.neutron.show_port(port['id'])['port'] for port in ports]
return server, updated_ports
def _get_resource_request(self, port):
res_req = port[constants.RESOURCE_REQUEST]
return res_req
def _assert_placement_updated(self, server, ports):
rsp = self.placement.get(
'/allocations/%s' % server['id'],
@ -1137,7 +1149,7 @@ class TestNovaManagePlacementHealPortAllocations(
# bridge RP
total_request = collections.defaultdict(int)
for port in ports:
port_request = port[constants.RESOURCE_REQUEST]['resources']
port_request = self._get_resource_request(port)['resources']
for rc, amount in port_request.items():
total_request[rc] += amount
self.assertEqual(total_request, network_allocations)
@ -1362,83 +1374,64 @@ class TestNovaManagePlacementHealPortAllocations(
'Successfully created allocations for instance', output)
self.assertEqual(0, result)
def test_heal_port_allocation_not_enough_resources_for_port(self):
"""Test that a port needs allocation but not enough inventory
available.
"""
@staticmethod
def _get_too_big_resource_request():
# The port will request too much NET_BW_IGR_KILOBIT_PER_SEC so there is
# no RP on the host that can provide it.
resource_request = {
return {
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 100000000000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": ["CUSTOM_PHYSNET2",
"CUSTOM_VNIC_TYPE_NORMAL"]
}
def test_heal_port_allocation_not_enough_resources_for_port(self):
"""Test that a port needs allocation but not enough inventory
available.
"""
server, ports = self._create_server_with_missing_port_alloc(
[self.neutron.port_1], resource_request)
[self.neutron.port_1], self._get_too_big_resource_request())
# let's trigger a heal
result = self.cli.heal_allocations(verbose=True, max_count=2)
self._assert_placement_not_updated(server)
# Actually the ports were updated but the update is rolled back when
# the placement update failed
self._assert_ports_not_updated(ports)
output = self.output.getvalue()
self.assertIn(
'Rolling back port update',
output)
self.assertIn(
'Failed to update allocations for consumer',
'Placement returned no allocation candidate',
output)
self.assertEqual(3, result)
def test_heal_port_allocation_no_rp_providing_required_traits(self):
"""Test that a port needs allocation but no rp is providing the
required traits.
"""
# The port will request a trait, CUSTOM_PHYSNET_NONEXISTENT that will
# not be provided by any RP on this host
resource_request = {
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": ["CUSTOM_PHYSNET_NONEXISTENT",
"CUSTOM_VNIC_TYPE_NORMAL"]
}
server, ports = self._create_server_with_missing_port_alloc(
[self.neutron.port_1], resource_request)
# let's trigger a heal
result = self.cli.heal_allocations(verbose=True, max_count=2)
self._assert_placement_not_updated(server)
self._assert_ports_not_updated(ports)
self.assertIn(
'No matching resource provider is available for healing the port '
'allocation',
self.output.getvalue())
self.assertEqual(3, result)
def test_heal_port_allocation_ambiguous_rps(self):
"""Test that there are more than one matching RPs are available on the
compute.
"""
# The port will request CUSTOM_VNIC_TYPE_DIRECT trait and there are
# two RPs that supports such trait.
resource_request = {
@staticmethod
def _get_ambiguous_resource_request():
return {
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": ["CUSTOM_PHYSNET2",
"CUSTOM_VNIC_TYPE_DIRECT"]
}
def test_heal_port_allocation_ambiguous_candidates(self):
"""Test that there are more than one matching set of RPs are available
on the compute.
"""
# Add bandwidth inventory for PF3 so that both FP2 and FP3 could
# support the port's request making the situation ambiguous
inventories = {
orc.NET_BW_IGR_KILOBIT_PER_SEC: {"total": 100000},
orc.NET_BW_EGR_KILOBIT_PER_SEC: {"total": 100000},
}
self._set_provider_inventories(
self.sriov_dev_rp_per_host[self.compute1_rp_uuid][self.PF3],
{"inventories": inventories}
)
server, ports = self._create_server_with_missing_port_alloc(
[self.neutron.port_1], resource_request)
[self.neutron.port_1], self._get_ambiguous_resource_request())
# let's trigger a heal
result = self.cli.heal_allocations(verbose=True, max_count=2)
@ -1447,7 +1440,7 @@ class TestNovaManagePlacementHealPortAllocations(
self._assert_ports_not_updated(ports)
self.assertIn(
'More than one matching resource provider',
' Placement returned more than one possible allocation candidates',
self.output.getvalue())
self.assertEqual(3, result)
@ -1595,12 +1588,18 @@ class TestNovaManagePlacementHealPortAllocations(
output)
self.assertEqual(7, result)
def _test_heal_port_allocation_placement_unavailable(
self, server, ports, error):
def test_heal_port_allocation_placement_unavailable_during_a_c(self):
server, ports = self._create_server_with_missing_port_alloc(
[self.neutron.port_1])
with mock.patch('nova.cmd.manage.PlacementCommands.'
'_get_rps_in_tree_with_required_traits',
side_effect=error):
# Simulate that placement is unavailable during the allocation
# candidate query. The safe_connect decorator signals that via
# returning None.
with mock.patch(
'nova.scheduler.client.report.SchedulerReportClient.'
'get_allocation_candidates',
return_value=None
):
result = self.cli.heal_allocations(verbose=True, max_count=2)
self._assert_placement_not_updated(server)
@ -1608,18 +1607,194 @@ class TestNovaManagePlacementHealPortAllocations(
self.assertEqual(3, result)
def test_heal_port_allocation_placement_unavailable(self):
def test_heal_port_allocation_placement_unavailable_during_update(self):
server, ports = self._create_server_with_missing_port_alloc(
[self.neutron.port_1])
for error in [
exception.PlacementAPIConnectFailure(),
exception.ResourceProviderRetrievalFailed(uuid=uuidsentinel.rp1),
exception.ResourceProviderTraitRetrievalFailed(
uuid=uuidsentinel.rp1)]:
# Simulate that placement is unavailable during updating the
# allocation. The retry decorator signals that via returning False
with mock.patch(
'nova.scheduler.client.report.SchedulerReportClient.'
'put_allocations',
return_value=False
):
result = self.cli.heal_allocations(verbose=True, max_count=2)
self._test_heal_port_allocation_placement_unavailable(
server, ports, error)
self._assert_placement_not_updated(server)
# Actually there was a port update but that was rolled back when
# the allocation update failed
self._assert_ports_not_updated(ports)
output = self.output.getvalue()
self.assertIn(
'Rolling back port update',
output)
self.assertEqual(3, result)
class TestNovaManagePlacementHealPortAllocationsExtended(
TestNovaManagePlacementHealPortAllocations
):
"""Run the same tests as in TestNovaManagePlacementHealPortAllocations but
with the ExtendedResourceRequestNeutronFixture that automatically
translates the resource request in the ports used by these test to the
extended format. Note that this will test the extended format handling but
only with a single request group per port.
"""
def setUp(self):
super().setUp()
self.neutron = self.useFixture(
test_res_req.ExtendedResourceRequestNeutronFixture(self))
def _get_resource_request(self, port):
# we assume a single resource request group in this test class
res_req = port[constants.RESOURCE_REQUEST][constants.REQUEST_GROUPS]
assert len(res_req) == 1
return res_req[0]
def _assert_port_updated(self, port_uuid):
updated_port = self.neutron.show_port(port_uuid)['port']
binding_profile = updated_port.get('binding:profile', {})
self.assertEqual(
{port_uuid: self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]},
binding_profile['allocation'])
class TestNovaManagePlacementHealPortAllocationsMultiGroup(
TestNovaManagePlacementHealPortAllocations
):
"""Run the same tests as in TestNovaManagePlacementHealPortAllocations but
with the MultiGroupResourceRequestNeutronFixture to test with extended
resource request with multiple groups.
"""
def setUp(self):
super().setUp()
self.neutron = self.useFixture(
test_res_req.MultiGroupResourceRequestNeutronFixture(self))
@staticmethod
def _get_default_resource_request(port_uuid):
# we need unique uuids per port for multi port tests
g1 = getattr(uuids, port_uuid + "group1")
g2 = getattr(uuids, port_uuid + "group2")
return {
"request_groups": [
{
"id": g1,
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": [
"CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"
]
},
{
"id": g2,
"resources": {
orc.NET_PACKET_RATE_KILOPACKET_PER_SEC: 1000
},
"required": ["CUSTOM_VNIC_TYPE_NORMAL"]
}
],
"same_subtree": [g1, g2],
}
@staticmethod
def _get_too_big_resource_request():
# The port will request too much NET_BW_IGR_KILOBIT_PER_SEC so there is
# no RP on the host that can provide it.
return {
"request_groups": [
{
"id": uuids.g1,
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 10000000000000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": [
"CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_NORMAL"
]
},
{
"id": uuids.g2,
"resources": {
orc.NET_PACKET_RATE_KILOPACKET_PER_SEC: 1000
},
"required": ["CUSTOM_VNIC_TYPE_NORMAL"]
}
],
"same_subtree": [uuids.g1, uuids.g2],
}
@staticmethod
def _get_ambiguous_resource_request():
# ambiguity cannot really be simulated with multiple groups as that
# would require multiple OVS bridges instead of multiple PFs. So
# falling back to a single group in this specific test case.
return {
"request_groups": [
{
"id": uuids.g1,
"resources": {
orc.NET_BW_IGR_KILOBIT_PER_SEC: 1000,
orc.NET_BW_EGR_KILOBIT_PER_SEC: 1000},
"required": [
"CUSTOM_PHYSNET2", "CUSTOM_VNIC_TYPE_DIRECT"
]
},
],
"same_subtree": [uuids.g1],
}
def _assert_placement_updated(self, server, ports):
rsp = self.placement.get(
'/allocations/%s' % server['id'],
version=1.28).body
allocations = rsp['allocations']
# we expect one allocation for the compute resources, one on the OVS
# bridge RP due to bandwidth and one on the OVS agent RP due to packet
# rate request
self.assertEqual(3, len(allocations))
self.assertEqual(
self._resources_from_flavor(self.flavor),
allocations[self.compute1_rp_uuid]['resources'])
self.assertEqual(server['tenant_id'], rsp['project_id'])
self.assertEqual(server['user_id'], rsp['user_id'])
ovs_bridge_allocations = allocations[
self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]]['resources']
ovs_agent_allocations = allocations[
self.ovs_agent_rp_per_host[self.compute1_rp_uuid]]['resources']
total_bandwidth_request = collections.defaultdict(int)
total_packet_rate_request = collections.defaultdict(int)
for port in ports:
res_req = (port.get(constants.RESOURCE_REQUEST) or {})
for group in res_req.get(constants.REQUEST_GROUPS):
port_request = group['resources']
for rc, amount in port_request.items():
if rc == orc.NET_PACKET_RATE_KILOPACKET_PER_SEC:
total_packet_rate_request[rc] += amount
else:
total_bandwidth_request[rc] += amount
self.assertEqual(total_bandwidth_request, ovs_bridge_allocations)
self.assertEqual(total_packet_rate_request, ovs_agent_allocations)
def _assert_port_updated(self, port_uuid):
updated_port = self.neutron.show_port(port_uuid)['port']
binding_profile = updated_port.get('binding:profile', {})
self.assertEqual(
{
getattr(uuids, port_uuid + "group1"):
self.ovs_bridge_rp_per_host[self.compute1_rp_uuid],
getattr(uuids, port_uuid + "group2"):
self.ovs_agent_rp_per_host[self.compute1_rp_uuid],
},
binding_profile['allocation']
)
class TestNovaManagePlacementSyncAggregates(

View File

@ -2780,6 +2780,10 @@ class TestNovaManagePlacement(test.NoDBTestCase):
self.output.getvalue())
self.assertIn("Conflict!", self.output.getvalue())
@mock.patch(
'nova.network.neutron.API.has_extended_resource_request_extension',
new=mock.Mock(return_value=False),
)
def test_has_request_but_no_allocation(self):
# False because there is a full resource_request and allocation set.
self.assertFalse(
@ -2795,7 +2799,10 @@ class TestNovaManagePlacement(test.NoDBTestCase):
]
},
'binding:profile': {'allocation': uuidsentinel.rp1}
}))
},
mock.sentinel.neutron,
)
)
# True because there is a full resource_request but no allocation set.
self.assertTrue(
self.cli._has_request_but_no_allocation(
@ -2810,7 +2817,10 @@ class TestNovaManagePlacement(test.NoDBTestCase):
]
},
'binding:profile': {}
}))
},
mock.sentinel.neutron,
)
)
# True because there is a full resource_request but no allocation set.
self.assertTrue(
self.cli._has_request_but_no_allocation(
@ -2825,73 +2835,89 @@ class TestNovaManagePlacement(test.NoDBTestCase):
]
},
'binding:profile': None,
}))
# False because there are no resources in the resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.empty_resources,
'resource_request': {
'resources': {},
'required': [
'CUSTOM_VNIC_TYPE_NORMAL'
]
},
'binding:profile': {}
}))
# False because there are no resources in the resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.missing_resources,
'resource_request': {
'required': [
'CUSTOM_VNIC_TYPE_NORMAL'
]
},
'binding:profile': {}
}))
# False because there are no required traits in the resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.empty_required,
'resource_request': {
'resources': {
'NET_BW_EGR_KILOBIT_PER_SEC': 1000,
},
'required': []
},
'binding:profile': {}
}))
# False because there are no required traits in the resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.missing_required,
'resource_request': {
'resources': {
'NET_BW_EGR_KILOBIT_PER_SEC': 1000,
},
},
'binding:profile': {}
}))
# False because there are no resources or required traits in the
# resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.empty_resource_request,
'resource_request': {},
'binding:profile': {}
}))
},
mock.sentinel.neutron,
)
)
# False because there is no resource_request.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.missing_resource_request,
'binding:profile': {}
}))
},
mock.sentinel.neutron,
)
)
@mock.patch(
'nova.network.neutron.API.has_extended_resource_request_extension',
new=mock.Mock(return_value=True),
)
def test_has_request_but_no_allocation_extended(self):
# False because there is resource_request and allocation set.
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.healed,
'resource_request': {
'request_groups': [
{
'id': uuidsentinel.group1,
'resources': {
'NET_BW_EGR_KILOBIT_PER_SEC': 1000,
},
'required': [
'CUSTOM_VNIC_TYPE_NORMAL'
]
},
],
},
'binding:profile': {
'allocation': {uuidsentinel.group1: uuidsentinel.rp1}
}
},
mock.sentinel.neutron,
)
)
# False because there no resource_request
self.assertFalse(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.healed,
'resource_request': None,
'binding:profile': {
'allocation': {uuidsentinel.group1: uuidsentinel.rp1}
}
},
mock.sentinel.neutron,
)
)
# True because we have request but no allocation set
self.assertTrue(
self.cli._has_request_but_no_allocation(
{
'id': uuidsentinel.healed,
'resource_request': {
'request_groups': [
{
'id': uuidsentinel.group1,
'resources': {
'NET_BW_EGR_KILOBIT_PER_SEC': 1000,
},
'required': [
'CUSTOM_VNIC_TYPE_NORMAL'
]
},
],
},
'binding:profile': {
'allocation': {}
}
},
mock.sentinel.neutron,
)
)
def test_update_ports_only_updates_binding_profile(self):
"""Simple test to make sure that only the port's binding:profile is