From 33644fbc8ebbca43d4d706d38501314a07ae5e66 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Mon, 25 Feb 2019 17:55:48 +0100 Subject: [PATCH] Fup for the bandwidth series This patch fixes comments in the following patches: * Ie5ea45484a632da77c41bfe2613fa5f1dfbf19ea * If8a13f74d2b3c99f05365eb49dcfa01d9042fefa * Ia2e6a65690a1d7f89af0bc32fa8be30ca09472e2 * Ic28ffe9268cb7a94214b029af66ccf9af40a3a78 Change-Id: Ic11d4b002d21f1ddd321b452502e05b152f0ce09 blueprint: bandwidth-resource-provider --- nova/compute/manager.py | 33 +++++--- nova/conductor/manager.py | 3 +- nova/network/base_api.py | 4 +- nova/network/neutronv2/api.py | 27 ++++--- nova/tests/fixtures.py | 4 + nova/tests/functional/test_compute_mgr.py | 1 - nova/tests/functional/test_servers.py | 88 ++++++--------------- nova/tests/unit/compute/test_compute_mgr.py | 20 ++--- nova/virt/fake.py | 58 ++++++++++++++ 9 files changed, 138 insertions(+), 100 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index b9770acc1f04..8b6113beed80 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2093,6 +2093,28 @@ class ComputeManager(manager.Manager): hints = filter_properties.get('scheduler_hints') or {} return hints + @staticmethod + def _get_request_group_mapping(request_spec): + """Return request group resource - provider mapping. This is currently + used for Neutron ports that have resource request due to the port + having QoS minimum bandwidth policy rule attached. + + :param request_spec: A RequestSpec object + :returns: A dict keyed by RequestGroup requester_id, currently Neutron + port_id, to resource provider UUID that provides resource for that + RequestGroup. + """ + + if (request_spec + and 'requested_resources' in request_spec + and request_spec.requested_resources is not None): + return { + group.requester_id: group.provider_uuids + for group in request_spec.requested_resources + } + else: + return None + def _build_and_run_instance(self, context, instance, image, injected_files, admin_password, requested_networks, security_groups, block_device_mapping, node, limits, filter_properties, @@ -2125,15 +2147,8 @@ class ComputeManager(manager.Manager): scheduler_hints) image_meta = objects.ImageMeta.from_dict(image) - if (request_spec - and 'requested_resources' in request_spec - and request_spec.requested_resources is not None): - request_group_resource_providers_mapping = { - group.requester_id: group.provider_uuids - for group in request_spec.requested_resources - } - else: - request_group_resource_providers_mapping = None + request_group_resource_providers_mapping = \ + self._get_request_group_mapping(request_spec) with self._build_resources(context, instance, requested_networks, security_groups, image_meta, diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index e6f63ea5acee..c73a4d6966a5 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -675,6 +675,7 @@ class ComputeTaskManager(base.Base): # NOTE(gibi): redo the request group - resource # provider mapping as the above claim call moves # the allocation of the instance to another host + # TODO(gibi): handle if the below call raises self._fill_provider_mapping( context, instance.uuid, request_spec) else: @@ -717,7 +718,7 @@ class ComputeTaskManager(base.Base): # NOTE(gibi): at this point the request spec already got converted # to a legacy dict and then back to an object so we lost the non - # legacy part of the spec. Re-populate the requested_resource + # legacy part of the spec. Re-populate the requested_resources # field based on the original request spec object passed to this # function. if request_spec: diff --git a/nova/network/base_api.py b/nova/network/base_api.py index 861b5d59db2e..e4d38bd9b14b 100644 --- a/nova/network/base_api.py +++ b/nova/network/base_api.py @@ -203,9 +203,9 @@ class NetworkAPI(base.Base): :param attach: Boolean indicating if a port is being attached to an existing running instance. Should be False during server create. :param resource_provider_mapping: a dict keyed by ids of the entities - (for example Neutron port) requested resources for this instance + (for example Neutron port) requesting resources for this instance mapped to a list of resource provider UUIDs that are fulfilling - such resource request. + such a resource request. :returns: network info as from get_instance_nw_info() below """ raise NotImplementedError() diff --git a/nova/network/neutronv2/api.py b/nova/network/neutronv2/api.py index 8548fec99c6f..7e6b03da5785 100644 --- a/nova/network/neutronv2/api.py +++ b/nova/network/neutronv2/api.py @@ -57,6 +57,7 @@ BINDING_PROFILE = 'binding:profile' BINDING_HOST_ID = 'binding:host_id' MIGRATING_ATTR = 'migrating_to' L3_NETWORK_TYPES = ['vxlan', 'gre', 'geneve'] +ALLOCATION = 'allocation' def reset_state(): @@ -671,13 +672,9 @@ class API(base_api.NetworkAPI): # NOTE: We're doing this to remove the binding information # for the physical device but don't want to overwrite the other # information in the binding profile. - for profile_key in ('pci_vendor_info', 'pci_slot'): + for profile_key in ('pci_vendor_info', 'pci_slot', ALLOCATION): if profile_key in port_profile: del port_profile[profile_key] - # NOTE(gibi): remove information about the resource allocation - # of this port - if 'allocation' in port_profile: - del port_profile['allocation'] port_req_body['port'][BINDING_PROFILE] = port_profile # NOTE: For internal DNS integration (network does not have a @@ -1041,9 +1038,9 @@ class API(base_api.NetworkAPI): :param attach: Boolean indicating if a port is being attached to an existing running instance. Should be False during server create. :param resource_provider_mapping: a dict keyed by ids of the entities - (for example Neutron port) requested resources for this instance + (for example Neutron port) requesting resources for this instance mapped to a list of resource provider UUIDs that are fulfilling - such resource request. + such a resource request. :returns: network info as from get_instance_nw_info() """ LOG.debug('allocate_for_instance()', instance=instance) @@ -1085,14 +1082,14 @@ class API(base_api.NetworkAPI): for port in requested_ports_dict.values(): # only communicate the allocations if the port has resource # requests - if port.get('resource_request', None): + if port.get('resource_request'): profile = port.get(BINDING_PROFILE, {}) # NOTE(gibi): In the resource provider mapping there can be # more than one RP fulfilling a request group. But resource # requests of a Neutron port is always mapped to a # numbered request group that is always fulfilled by one # resource provider. So we only pass that single RP UUID here. - profile['allocation'] = resource_provider_mapping[ + profile[ALLOCATION] = resource_provider_mapping[ port['id']][0] port[BINDING_PROFILE] = profile @@ -1104,10 +1101,6 @@ class API(base_api.NetworkAPI): # pre-existing port so one wasn't created here. The ports will be # updated later in _update_ports_for_instance to be bound to the # instance and compute host. - # TODO(gibi): if a port created here has resource request then we have - # to abort as that resource request was not considered during the - # scheduling of the instance and therefore there is no allocation in - # placement for that port. requests_and_created_ports = self._create_ports_for_instance( context, instance, ordered_networks, nets, neutron, security_group_ids) @@ -1365,6 +1358,10 @@ class API(base_api.NetworkAPI): binding['profile'] = profile data = dict(binding=binding) + # TODO(gibi): To support ports with resource request during server + # live migrate operation we need to take care of 'allocation' key in + # the binding profile per binding. + bindings_by_port_id = {} for port_id in port_ids: resp = client.post('/v2.0/ports/%s/bindings' % port_id, @@ -3231,6 +3228,10 @@ class API(base_api.NetworkAPI): # If the host hasn't changed, like in the case of resizing to the # same host, there is nothing to do. if p.get(BINDING_HOST_ID) != host: + # TODO(gibi): To support ports with resource request during + # server move operations we need to take care of 'allocation' + # key in the binding profile per binding. + updates[BINDING_HOST_ID] = host # If the host changed, the AZ could have also changed so we # need to update the device_owner. diff --git a/nova/tests/fixtures.py b/nova/tests/fixtures.py index 9ccc03e3a7af..cd95d436d74c 100644 --- a/nova/tests/fixtures.py +++ b/nova/tests/fixtures.py @@ -1389,10 +1389,14 @@ class NeutronFixture(fixtures.Fixture): # The fixture allows port update so we need to deepcopy the class # variables to avoid test case interference. self._ports = { + # NOTE(gibi)The port_with_sriov_resource_request cannot be added + # globally in this fixture as it adds a second network that makes + # auto allocation based test to fail due to ambiguous networks. NeutronFixture.port_1['id']: copy.deepcopy(NeutronFixture.port_1), NeutronFixture.port_with_resource_request['id']: copy.deepcopy(NeutronFixture.port_with_resource_request) } + # The fixture does not allow network update so we don't have to # deepcopy here self._networks = { diff --git a/nova/tests/functional/test_compute_mgr.py b/nova/tests/functional/test_compute_mgr.py index 3cfc469ad494..75a6e9f5970a 100644 --- a/nova/tests/functional/test_compute_mgr.py +++ b/nova/tests/functional/test_compute_mgr.py @@ -72,7 +72,6 @@ class ComputeManagerTestCase(test.TestCase): 'instance_type': flavor, 'image': None}, filter_properties) - request_spec.requested_resources = [] self.compute.manager.build_and_run_instance( self.context, instance, {}, request_spec, filter_properties, block_device_mapping=[]) diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 96f630cac17a..8548e60c864f 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -23,6 +23,7 @@ import zlib from keystoneauth1 import adapter import mock +from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import base64 from oslo_serialization import jsonutils @@ -53,6 +54,7 @@ from nova.tests.unit.objects import test_instance_info_cache from nova.virt import fake from nova import volume +CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -5701,67 +5703,15 @@ class PortResourceRequestBasedSchedulingTestBase( def setUp(self): # enable PciPassthroughFilter to support SRIOV before the base class # starts the scheduler - self.flags(enabled_filters=[ - "RetryFilter", - "AvailabilityZoneFilter", - "ComputeFilter", - "ComputeCapabilitiesFilter", - "ImagePropertiesFilter", - "ServerGroupAntiAffinityFilter", - "ServerGroupAffinityFilter", - "PciPassthroughFilter", - ], - group='filter_scheduler') + if 'PciPassthroughFilter' not in CONF.filter_scheduler.enabled_filters: + self.flags( + enabled_filters=CONF.filter_scheduler.enabled_filters + + ['PciPassthroughFilter'], + group='filter_scheduler') - # Set passthrough_whitelist before the base class starts the compute - # node to match with the PCI devices reported by the - # FakeDriverWithPciResources. - - # NOTE(gibi): 0000:01:00 is tagged to physnet1 and therefore not a - # match based on physnet to our sriov port - # 'port_with_sriov_resource_request' as the network of that port points - # to physnet2 with the attribute 'provider:physical_network'. Nova pci - # handling already enforce this rule. - # - # 0000:02:00 and 0000:03:00 are both tagged to physnet2 and therefore - # a good match for our sriov port based on physnet. Having two PFs on - # the same physnet will allows us to test the placement allocation - - # physical allocation matching based on the bandwidth allocation - # in the future. - self.flags(passthrough_whitelist= - [ - jsonutils.dumps( - { - "address": { - "domain": "0000", - "bus": "01", - "slot": "00", - "function": ".*"}, - "physical_network": "physnet1", - } - ), - jsonutils.dumps( - { - "address": { - "domain": "0000", - "bus": "02", - "slot": "00", - "function": ".*"}, - "physical_network": "physnet2", - } - ), - jsonutils.dumps( - { - "address": { - "domain": "0000", - "bus": "03", - "slot": "00", - "function": ".*"}, - "physical_network": "physnet2", - } - ), - ], - group='pci') + self.useFixture( + fake.FakeDriverWithPciResources. + FakeDriverWithPciResourcesConfigFixture()) super(PortResourceRequestBasedSchedulingTestBase, self).setUp() self.compute1 = self._start_compute('host1') @@ -5769,6 +5719,11 @@ class PortResourceRequestBasedSchedulingTestBase( self.ovs_bridge_rp_per_host = {} self.flavor = self.api.get_flavors()[0] self.flavor_with_group_policy = self.api.get_flavors()[1] + + # Setting group policy for placement. This is mandatory when more than + # one request group is included in the allocation candidate request and + # we have tests with two ports both having resource request modelled as + # two separate request groups. self.admin_api.post_extra_spec( self.flavor_with_group_policy['id'], {'extra_specs': {'group_policy': 'isolate'}}) @@ -5842,6 +5797,10 @@ class PortResourceRequestBasedSchedulingTestBase( def _create_pf_device_rp( self, device_rp_uuid, parent_rp_uuid, inventories, traits, device_rp_name=None): + """Create a RP in placement for a physical function network device with + traits and inventories. + """ + if not device_rp_name: device_rp_name = device_rp_uuid @@ -5877,9 +5836,10 @@ class PortResourceRequestBasedSchedulingTestBase( # * PF3 represents the PCI device 0000:03:00 and, it will be mapped to # physnet2 but it will not have bandwidth inventory. + compute_name = compute_rp_uuid sriov_agent_rp_uuid = getattr(uuids, compute_rp_uuid + 'sriov agent') agent_rp_req = { - "name": "compute0:NIC Switch agent", + "name": "%s:NIC Switch agent" % compute_name, "uuid": sriov_agent_rp_uuid, "parent_provider_uuid": compute_rp_uuid } @@ -5897,7 +5857,7 @@ class PortResourceRequestBasedSchedulingTestBase( traits = [self.CUSTOM_VNIC_TYPE_DIRECT, self.CUSTOM_PHYSNET1] self._create_pf_device_rp( self.sriov_pf1_rp_uuid, sriov_agent_rp_uuid, inventories, traits, - device_rp_name="compute0:NIC Switch agent:ens1") + device_rp_name="%s:NIC Switch agent:ens1" % compute_name) self.sriov_pf2_rp_uuid = getattr(uuids, sriov_agent_rp_uuid + 'PF2') inventories = { @@ -5909,14 +5869,14 @@ class PortResourceRequestBasedSchedulingTestBase( traits = [self.CUSTOM_VNIC_TYPE_DIRECT, self.CUSTOM_PHYSNET2] self._create_pf_device_rp( self.sriov_pf2_rp_uuid, sriov_agent_rp_uuid, inventories, traits, - device_rp_name="compute0:NIC Switch agent:ens2") + device_rp_name="%s:NIC Switch agent:ens2" % compute_name) self.sriov_pf3_rp_uuid = getattr(uuids, sriov_agent_rp_uuid + 'PF3') inventories = {} traits = [self.CUSTOM_VNIC_TYPE_DIRECT, self.CUSTOM_PHYSNET2] self._create_pf_device_rp( self.sriov_pf3_rp_uuid, sriov_agent_rp_uuid, inventories, traits, - device_rp_name="compute0:NIC Switch agent:ens3") + device_rp_name="%s:NIC Switch agent:ens3" % compute_name) def _create_networking_rp_tree(self, compute_rp_uuid): # let's simulate what the neutron would do diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 28772c53be16..e78c1b8672f1 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -6606,17 +6606,17 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): requester_id=uuids.port1, provider_uuids=[uuids.rp1])]) with test.nested( - mock.patch.object(self.compute.driver, 'spawn'), - mock.patch.object(self.compute, - '_build_networks_for_instance', return_value=[]), - mock.patch.object(self.instance, 'save'), + mock.patch.object(self.compute.driver, 'spawn'), + mock.patch.object( + self.compute, '_build_networks_for_instance', return_value=[]), + mock.patch.object(self.instance, 'save'), ) as (mock_spawn, mock_networks, mock_save): - self.compute._build_and_run_instance( - self.context, - self.instance, self.image, self.injected_files, - self.admin_pass, self.requested_networks, - self.security_groups, self.block_device_mapping, self.node, - self.limits, self.filter_properties, request_spec) + self.compute._build_and_run_instance( + self.context, + self.instance, self.image, self.injected_files, + self.admin_pass, self.requested_networks, + self.security_groups, self.block_device_mapping, self.node, + self.limits, self.filter_properties, request_spec) mock_networks.assert_called_once_with( self.context, self.instance, self.requested_networks, diff --git a/nova/virt/fake.py b/nova/virt/fake.py index c1f04c5dcd1d..c74410b53df7 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -28,6 +28,7 @@ import contextlib import copy import time +import fixtures import os_resource_classes as orc from oslo_log import log as logging from oslo_serialization import jsonutils @@ -836,6 +837,63 @@ class FakeLiveMigrateDriverWithNestedCustomResources( class FakeDriverWithPciResources(SmallFakeDriver): + + # NOTE(gibi): Always use this fixture along with the + # FakeDriverWithPciResources to make the necessary configuration for the + # driver. + class FakeDriverWithPciResourcesConfigFixture(fixtures.Fixture): + def setUp(self): + super(FakeDriverWithPciResources. + FakeDriverWithPciResourcesConfigFixture, self).setUp() + # Set passthrough_whitelist before the compute node starts to match + # with the PCI devices reported by this fake driver. + + # NOTE(gibi): 0000:01:00 is tagged to physnet1 and therefore not a + # match based on physnet to our sriov port + # 'port_with_sriov_resource_request' as the network of that port + # points to physnet2 with the attribute + # 'provider:physical_network'. Nova pci handling already enforces + # this rule. + # + # 0000:02:00 and 0000:03:00 are both tagged to physnet2 and + # therefore a good match for our sriov port based on physnet. + # Having two PFs on the same physnet will allow us to test the + # placement allocation - physical allocation matching based on the + # bandwidth allocation in the future. + CONF.set_override('passthrough_whitelist', override=[ + jsonutils.dumps( + { + "address": { + "domain": "0000", + "bus": "01", + "slot": "00", + "function": ".*"}, + "physical_network": "physnet1", + } + ), + jsonutils.dumps( + { + "address": { + "domain": "0000", + "bus": "02", + "slot": "00", + "function": ".*"}, + "physical_network": "physnet2", + } + ), + jsonutils.dumps( + { + "address": { + "domain": "0000", + "bus": "03", + "slot": "00", + "function": ".*"}, + "physical_network": "physnet2", + } + ), + ], + group='pci') + def get_available_resource(self, nodename): host_status = super( FakeDriverWithPciResources, self).get_available_resource(nodename)