diff --git a/neutron/plugins/ml2/driver_context.py b/neutron/plugins/ml2/driver_context.py index c61982729e3..4e4afaf5bd8 100644 --- a/neutron/plugins/ml2/driver_context.py +++ b/neutron/plugins/ml2/driver_context.py @@ -304,6 +304,16 @@ class PortContext(MechanismDriverContext, api.PortContext): self._binding.vif_details = jsonutils.dumps(vif_details) self._new_port_status = status + def _unset_binding(self): + '''Undo a previous call to set_binding() before it gets committed. + + This method is for MechanismManager and is not part of the driver API. + ''' + self._new_bound_segment = None + self._binding.vif_type = portbindings.VIF_TYPE_UNBOUND + self._binding.vif_details = '' + self._new_port_status = None + def continue_binding(self, segment_id, next_segments_to_bind): # TODO(rkukura) Verify binding allowed, segment in network self._new_bound_segment = segment_id diff --git a/neutron/plugins/ml2/drivers/mech_agent.py b/neutron/plugins/ml2/drivers/mech_agent.py index ee1fa12e8e3..5b41cdfea79 100644 --- a/neutron/plugins/ml2/drivers/mech_agent.py +++ b/neutron/plugins/ml2/drivers/mech_agent.py @@ -14,10 +14,12 @@ # under the License. import abc +import uuid from neutron_lib.api.definitions import portbindings from neutron_lib.callbacks import resources from neutron_lib import constants as const +from neutron_lib.placement import utils as place_utils from neutron_lib.plugins.ml2 import api from oslo_log import log import six @@ -161,6 +163,73 @@ class AgentMechanismDriverBase(api.MechanismDriver): "%s mechanism driver!") % self.agent_type) return supported_vnic_types + def _possible_agents_for_port(self, context): + agent_filters = { + 'host': [context.current['binding:host_id']], + 'agent_type': [self.agent_type], + 'admin_state_up': [True], + # By not filtering for 'alive' we may report being responsible + # and still not being able to handle the binding. But that case + # will be properly logged and handled very soon. That is when + # trying to bind with a dead agent. + } + return context._plugin.get_agents( + context._plugin_context, + filters=agent_filters, + ) + + def responsible_for_ports_allocation(self, context): + """Report if an agent is responsible for a resource provider. + + :param context: PortContext instance describing the port + :returns: True for responsible, False for not responsible + + An agent based mechanism driver is reponsible for a resource provider + if an agent of it is responsible for that resource provider. An agent + reports responsibility by including the resource provider in the + configurations field of the agent heartbeat. + """ + uuid_ns = self.resource_provider_uuid5_namespace + if uuid_ns is None: + return False + if 'allocation' not in context.current['binding:profile']: + return False + + rp = uuid.UUID(context.current['binding:profile']['allocation']) + host_agents = self._possible_agents_for_port(context) + + reported = {} + for agent in host_agents: + if 'resource_provider_bandwidths' in agent['configurations']: + for device in agent['configurations'][ + 'resource_provider_bandwidths'].keys(): + device_rp_uuid = place_utils.device_resource_provider_uuid( + namespace=uuid_ns, + host=agent['host'], + device=device) + if device_rp_uuid == rp: + reported[agent['id']] = agent + + if len(reported) == 1: + agent = list(reported.values())[0] + LOG.debug("Agent %(agent)s of type %(agent_type)s reports to be " + "responsible for resource provider %(rsc_provider)s", + {'agent': agent['id'], + 'agent_type': agent['agent_type'], + 'rsc_provider': rp}) + return True + elif len(reported) > 1: + LOG.error("Agent misconfiguration, multiple agents on the same " + "host %(host)s reports being responsible for resource " + "provider %(rsc_provider)s: %(agents)s", + {'host': context.current['binding:host_id'], + 'rsc_provider': rp, + 'agents': reported.keys()}) + return False + else: + # not responsible, must be somebody else + return False + @six.add_metaclass(abc.ABCMeta) class SimpleAgentMechanismDriverBase(AgentMechanismDriverBase): diff --git a/neutron/plugins/ml2/managers.py b/neutron/plugins/ml2/managers.py index 5ca415d735c..1e8fd01f012 100644 --- a/neutron/plugins/ml2/managers.py +++ b/neutron/plugins/ml2/managers.py @@ -22,6 +22,7 @@ from neutron_lib import constants from neutron_lib.db import api as db_api from neutron_lib import exceptions as exc from neutron_lib.exceptions import multiprovidernet as mpnet_exc +from neutron_lib.exceptions import placement as place_exc from neutron_lib.exceptions import vlantransparent as vlan_exc from neutron_lib.plugins.ml2 import api from oslo_config import cfg @@ -757,12 +758,18 @@ class MechanismManager(stevedore.named.NamedExtensionManager): 'vnic_type': binding.vnic_type, 'segments': context.network.network_segments}) - def _bind_port_level(self, context, level, segments_to_bind): + def _bind_port_level(self, context, level, segments_to_bind, + drivers=None, redoing_bottom=False): + if drivers is None: + drivers = self.ordered_mech_drivers + binding = context._binding port_id = context.current['id'] - LOG.debug("Attempting to bind port %(port)s on host %(host)s " - "at level %(level)s using segments %(segments)s", + LOG.debug("Attempting to bind port %(port)s by drivers %(drivers)s " + "on host %(host)s at level %(level)s using " + "segments %(segments)s", {'port': port_id, + 'drivers': ','.join([driver.name for driver in drivers]), 'host': context.host, 'level': level, 'segments': segments_to_bind}) @@ -774,7 +781,7 @@ class MechanismManager(stevedore.named.NamedExtensionManager): 'host': context.host}) return False - for driver in self.ordered_mech_drivers: + for driver in drivers: if not self._check_driver_to_bind(driver, segments_to_bind, context._binding_levels): continue @@ -806,6 +813,61 @@ class MechanismManager(stevedore.named.NamedExtensionManager): 'lvl': level + 1}) context._pop_binding_level() else: + # NOTE(bence romsics): Consider: "In case of + # hierarchical port binding binding_profile.allocation + # [decided and sent by Placement and Nova] + # is meant to drive the binding only on the binding + # level that represents the closest physical interface + # to the nova server." Link to spec: + # + # https://review.openstack.org/#/c/508149/14/specs\ + # /rocky/minimum-bandwidth-\ + # allocation-placement-api.rst@582 + # + # But we cannot tell if a binding level is + # the bottom binding level before set_binding() + # gets called, and that's already too late. So we + # must undo the last binding after set_binding() + # was called and redo the last level trying to + # bind only with one driver as inferred from + # the allocation. In order to undo the binding + # here we must also assume that each driver's + # bind_port() implementation is side effect free + # beyond calling set_binding(). + # + # Also please note that technically we allow for + # a redo to call continue_binding() instead of + # set_binding() and by that turn what was supposed + # to be the bottom level into a non-bottom binding + # level. A thorough discussion is recommended if + # you think of taking advantage of this. + # + # Also if we find use cases requiring + # diamond-shaped selections of drivers on different + # levels (eg. driverA and driverB can be both + # a valid choice on level 0, but on level 1 both + # previous choice leads to driverC) then we need + # to restrict segment selection too based on + # traits of the allocated resource provider on + # the top binding_level (==0). + if (context.current['binding:profile'] is not None and + 'allocation' in context.current[ + 'binding:profile'] and + not redoing_bottom): + LOG.debug( + "Undo bottom bound level and redo it " + "according to binding_profile.allocation, " + "resource provider uuid: %s", + context.current[ + 'binding:profile']['allocation']) + context._pop_binding_level() + context._unset_binding() + return self._bind_port_level( + context, level, segments_to_bind, + drivers=[self._infer_driver_from_allocation( + context)], + redoing_bottom=True) + # Binding complete. LOG.debug("Bound port: %(port)s, " "host: %(host)s, " @@ -823,6 +885,59 @@ class MechanismManager(stevedore.named.NamedExtensionManager): "bind_port", driver.name) + def _infer_driver_from_allocation(self, context): + """Choose mechanism driver as implied by allocation in placement. + + :param context: PortContext instance describing the port + :returns: a single MechanismDriver instance + + Ports allocated to a resource provider (ie. a physical network + interface) in Placement have the UUID of the provider in their + binding:profile.allocation. The choice of a physical network + interface (as recorded in the allocation) implies a choice of + mechanism driver too. When an allocation was received we expect + exactly one mechanism driver to be responsible for that physical + network interface resource provider. + """ + + drivers = [] + for driver in self.ordered_mech_drivers: + if driver.obj.responsible_for_ports_allocation(context): + drivers.append(driver) + + if len(drivers) == 0: + LOG.error("Failed to bind port %(port)s on host " + "%(host)s allocated on resource provider " + "%(rsc_provider)s, because no mechanism driver " + "reports being responsible", + {'port': context.current['id'], + 'host': context.host, + 'rsc_provider': context.current[ + 'binding:profile']['allocation']}) + raise place_exc.UnknownResourceProvider( + rsc_provider=context.current['binding:profile']['allocation']) + + if len(drivers) >= 2: + raise place_exc.AmbiguousResponsibilityForResourceProvider( + rsc_provider=context.current['binding:profile']['allocation'], + drivers=','.join([driver.name for driver in drivers])) + + # NOTE(bence romsics): The error conditions for raising either + # UnknownResourceProvider or AmbiguousResponsibilityForResourceProvider + # are pretty static therefore the usual 10-times-retry of a binding + # failure could easily be unnecessary in those cases. However at this + # point special handling of these exceptions in the binding retry loop + # seems like premature optimization to me since these exceptions are + # always a sign of a misconfigured neutron deployment. + + LOG.debug("Restricting possible bindings of port %(port)s " + "(as inferred from placement allocation) to " + "mechanism driver '%(driver)s'", + {'port': context.current['id'], + 'driver': drivers[0].name}) + + return drivers[0] + def is_host_filtering_supported(self): return all(driver.obj.is_host_filtering_supported() for driver in self.ordered_mech_drivers) diff --git a/neutron/tests/unit/plugins/ml2/_test_mech_agent.py b/neutron/tests/unit/plugins/ml2/_test_mech_agent.py index 89398c29171..edc30ce9ced 100644 --- a/neutron/tests/unit/plugins/ml2/_test_mech_agent.py +++ b/neutron/tests/unit/plugins/ml2/_test_mech_agent.py @@ -246,6 +246,22 @@ class AgentMechanismGenericTestCase(AgentMechanismBaseTestCase): self.driver.bind_port(context) self._check_unbound(context) + def test_driver_not_responsible_for_ports_allocation(self): + agents = [ + {'configurations': {'rp_bandwidths': {'eth0': {}}}, + 'host': 'host'}, + ] + profile = {} + segments = [] + port_ctx = FakePortContext( + self.AGENT_TYPE, + agents, + segments, + vnic_type=portbindings.VNIC_DIRECT, + profile=profile) + self.assertFalse( + self.driver.responsible_for_ports_allocation(port_ctx)) + class AgentMechanismLocalTestCase(AgentMechanismBaseTestCase): LOCAL_SEGMENTS = [{api.ID: 'unknown_segment_id', diff --git a/neutron/tests/unit/plugins/ml2/drivers/mech_fake_agent.py b/neutron/tests/unit/plugins/ml2/drivers/mech_fake_agent.py index 41a5e32c75e..c71c0f2ff5c 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/mech_fake_agent.py +++ b/neutron/tests/unit/plugins/ml2/drivers/mech_fake_agent.py @@ -60,3 +60,7 @@ class FakeAgentMechanismDriver(mech_agent.SimpleAgentMechanismDriverBase): def get_mappings(self, agent): return dict(agent['configurations'].get('interface_mappings', {})) + + +class AnotherFakeAgentMechanismDriver(FakeAgentMechanismDriver): + pass diff --git a/neutron/tests/unit/plugins/ml2/drivers/mech_sriov/mech_driver/test_mech_sriov_nic_switch.py b/neutron/tests/unit/plugins/ml2/drivers/mech_sriov/mech_driver/test_mech_sriov_nic_switch.py index 8e084d7a701..35d0afecb78 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/mech_sriov/mech_driver/test_mech_sriov_nic_switch.py +++ b/neutron/tests/unit/plugins/ml2/drivers/mech_sriov/mech_driver/test_mech_sriov_nic_switch.py @@ -87,6 +87,28 @@ class SriovSwitchMechGenericTestCase(SriovNicSwitchMechanismBaseTestCase, segment = {api.NETWORK_TYPE: network_type} self.assertTrue(self.driver.check_segment_for_agent(segment)) + def test_driver_responsible_for_ports_allocation(self): + agents = [ + {'agent_type': 'NIC Switch agent', + 'configurations': {'resource_provider_bandwidths': {'eth0': {}}}, + 'host': 'host', + 'id': '1'} + ] + segments = [] + # uuid -v5 87f1895c-73bb-11e8-9008-c4d987b2a692 host:eth0 + profile = {'allocation': '5762cf50-781b-5f01-8ebc-0cce8c9e74cd'} + + port_ctx = base.FakePortContext( + self.AGENT_TYPE, + agents, + segments, + vnic_type=portbindings.VNIC_DIRECT, + profile=profile) + with mock.patch.object(self.driver, '_possible_agents_for_port', + return_value=agents): + self.assertTrue( + self.driver.responsible_for_ports_allocation(port_ctx)) + class SriovMechVlanTestCase(SriovNicSwitchMechanismBaseTestCase, base.AgentMechanismBaseTestCase): diff --git a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/mech_driver/test_mech_openvswitch.py b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/mech_driver/test_mech_openvswitch.py index b67783f175b..cf073b5471a 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/openvswitch/mech_driver/test_mech_openvswitch.py +++ b/neutron/tests/unit/plugins/ml2/drivers/openvswitch/mech_driver/test_mech_openvswitch.py @@ -166,7 +166,27 @@ class OpenvswitchMechanismHybridPlugTestCase(OpenvswitchMechanismBaseTestCase): class OpenvswitchMechanismGenericTestCase(OpenvswitchMechanismBaseTestCase, base.AgentMechanismGenericTestCase): - pass + def test_driver_responsible_for_ports_allocation(self): + agents = [ + {'agent_type': 'Open vSwitch agent', + 'configurations': {'resource_provider_bandwidths': {'eth0': {}}}, + 'id': '1', + 'host': 'host'} + ] + segments = [] + # uuid -v5 87ee7d5c-73bb-11e8-9008-c4d987b2a692 host:eth0 + profile = {'allocation': '13cc0ed9-e802-5eaa-b4c7-3441855e31f2'} + + port_ctx = base.FakePortContext( + self.AGENT_TYPE, + agents, + segments, + vnic_type=portbindings.VNIC_NORMAL, + profile=profile) + with mock.patch.object(self.driver, '_possible_agents_for_port', + return_value=agents): + self.assertTrue( + self.driver.responsible_for_ports_allocation(port_ctx)) class OpenvswitchMechanismLocalTestCase(OpenvswitchMechanismBaseTestCase, diff --git a/neutron/tests/unit/plugins/ml2/test_managers.py b/neutron/tests/unit/plugins/ml2/test_managers.py index 38e42981049..dfb53080286 100644 --- a/neutron/tests/unit/plugins/ml2/test_managers.py +++ b/neutron/tests/unit/plugins/ml2/test_managers.py @@ -16,6 +16,7 @@ import mock +from neutron_lib.exceptions import placement as place_exc from neutron_lib.plugins.ml2 import api from oslo_config import cfg from oslo_db import exception as db_exc @@ -68,6 +69,60 @@ class TestManagers(base.BaseTestCase): manager._bind_port_level(self.context, 0, self.segments_to_bind) self.assertEqual(0, bind_port.call_count) + def test__infer_driver_from_allocation_positive(self): + cfg.CONF.set_override( + 'mechanism_drivers', ['fake_agent'], group='ml2') + manager = managers.MechanismManager() + with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver, + 'responsible_for_ports_allocation', + return_value=True): + responsible_driver = manager._infer_driver_from_allocation( + FakePortContext( + None, + None, + self.segments_to_bind, + profile={'allocation': 'fake_resource_provider'})) + self.assertEqual(responsible_driver.name, 'fake_agent') + + def test__infer_driver_from_allocation_negative(self): + cfg.CONF.set_override( + 'mechanism_drivers', ['fake_agent'], group='ml2') + manager = managers.MechanismManager() + with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver, + 'responsible_for_ports_allocation', + return_value=False): + self.assertRaises( + place_exc.UnknownResourceProvider, + manager._infer_driver_from_allocation, + FakePortContext( + None, + None, + self.segments_to_bind, + profile={'allocation': 'fake_resource_provider'}) + ) + + def test__infer_driver_from_allocation_ambiguous(self): + cfg.CONF.set_override( + 'mechanism_drivers', + ['fake_agent', 'another_fake_agent'], + group='ml2') + manager = managers.MechanismManager() + with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver, + 'responsible_for_ports_allocation', + return_value=True), \ + mock.patch.object(mech_fake_agent.AnotherFakeAgentMechanismDriver, + 'responsible_for_ports_allocation', + return_value=True): + self.assertRaises( + place_exc.AmbiguousResponsibilityForResourceProvider, + manager._infer_driver_from_allocation, + FakePortContext( + None, + None, + self.segments_to_bind, + profile={'allocation': 'fake_resource_provider'}) + ) + @mock.patch.object(managers.LOG, 'critical') @mock.patch.object(managers.MechanismManager, '_driver_not_loaded') def test__driver_not_found(self, mock_not_loaded, mock_log): diff --git a/neutron/tests/unit/plugins/ml2/test_port_binding.py b/neutron/tests/unit/plugins/ml2/test_port_binding.py index 18e51c480d8..39fa94a3a0a 100644 --- a/neutron/tests/unit/plugins/ml2/test_port_binding.py +++ b/neutron/tests/unit/plugins/ml2/test_port_binding.py @@ -640,3 +640,20 @@ class ExtendedPortBindingTestCase(test_plugin.NeutronDbPluginV2TestCase): port, new_binding = self._create_port_and_binding() response = self._delete_port_binding(port['id'], 'other-host') self.assertEqual(webob.exc.HTTPNotFound.code, response.status_int) + + def test_binding_fail_for_unknown_allocation(self): + # The UUID is a random one - which of course is unknown to neutron + # as a resource provider UUID. + profile = {'allocation': 'ccccbb4c-2adf-11e9-91bc-db7063775d06'} + kwargs = {pbe_ext.PROFILE: profile} + device_owner = '%s%s' % (const.DEVICE_OWNER_COMPUTE_PREFIX, 'nova') + + with self.port(device_owner=device_owner) as port: + port_id = port['port']['id'] + response = self._create_port_binding( + self.fmt, port_id, self.host, **kwargs) + + self.assertEqual(webob.exc.HTTPInternalServerError.code, + response.status_int) + self.assertTrue(exceptions.PortBindingError.__name__ in + response.text) diff --git a/setup.cfg b/setup.cfg index 5b0e3aa08ae..9c587c07578 100644 --- a/setup.cfg +++ b/setup.cfg @@ -92,6 +92,7 @@ neutron.ml2.mechanism_drivers = l2population = neutron.plugins.ml2.drivers.l2pop.mech_driver:L2populationMechanismDriver sriovnicswitch = neutron.plugins.ml2.drivers.mech_sriov.mech_driver.mech_driver:SriovNicSwitchMechanismDriver fake_agent = neutron.tests.unit.plugins.ml2.drivers.mech_fake_agent:FakeAgentMechanismDriver + another_fake_agent = neutron.tests.unit.plugins.ml2.drivers.mech_fake_agent:AnotherFakeAgentMechanismDriver faulty_agent = neutron.tests.unit.plugins.ml2.drivers.mech_faulty_agent:FaultyAgentMechanismDriver neutron.ml2.extension_drivers = test = neutron.tests.unit.plugins.ml2.drivers.ext_test:TestExtensionDriver