Drive binding by placement allocation
Drive the choice of mechanism driver during binding as inferred from the resource provider allocated by nova and as told to neutron via the port's binding:profile. As discussed on a neutron qos irc meeting some time ago this patch introduces a new assumption on bind_port() implementations. That is an implementation of bind_port() in any mech driver supporting Guaranteed Minimum Bandwidth bind_port() must not have a non-idempotent side effect. Because the last binding level will be redone for a 2nd time with a narrowed down list of mechanism drivers. And if the 2nd call does not give the same result as the first all kind of weird things can happen. Change-Id: I2b7573ec6795170ce45a13d5d0ad7844fb85182d Depends-On: https://review.openstack.org/574781 Depends-On: https://review.openstack.org/635160 Partial-Bug: #1578989 See-Also: https://review.openstack.org/502306 (nova spec) See-Also: https://review.openstack.org/508149 (neutron spec)
This commit is contained in:
parent
fe73e8c9b3
commit
74c51a2e53
@ -304,6 +304,16 @@ class PortContext(MechanismDriverContext, api.PortContext):
|
||||
self._binding.vif_details = jsonutils.dumps(vif_details)
|
||||
self._new_port_status = status
|
||||
|
||||
def _unset_binding(self):
|
||||
'''Undo a previous call to set_binding() before it gets committed.
|
||||
|
||||
This method is for MechanismManager and is not part of the driver API.
|
||||
'''
|
||||
self._new_bound_segment = None
|
||||
self._binding.vif_type = portbindings.VIF_TYPE_UNBOUND
|
||||
self._binding.vif_details = ''
|
||||
self._new_port_status = None
|
||||
|
||||
def continue_binding(self, segment_id, next_segments_to_bind):
|
||||
# TODO(rkukura) Verify binding allowed, segment in network
|
||||
self._new_bound_segment = segment_id
|
||||
|
@ -14,10 +14,12 @@
|
||||
# under the License.
|
||||
|
||||
import abc
|
||||
import uuid
|
||||
|
||||
from neutron_lib.api.definitions import portbindings
|
||||
from neutron_lib.callbacks import resources
|
||||
from neutron_lib import constants as const
|
||||
from neutron_lib.placement import utils as place_utils
|
||||
from neutron_lib.plugins.ml2 import api
|
||||
from oslo_log import log
|
||||
import six
|
||||
@ -161,6 +163,73 @@ class AgentMechanismDriverBase(api.MechanismDriver):
|
||||
"%s mechanism driver!") % self.agent_type)
|
||||
return supported_vnic_types
|
||||
|
||||
def _possible_agents_for_port(self, context):
|
||||
agent_filters = {
|
||||
'host': [context.current['binding:host_id']],
|
||||
'agent_type': [self.agent_type],
|
||||
'admin_state_up': [True],
|
||||
# By not filtering for 'alive' we may report being responsible
|
||||
# and still not being able to handle the binding. But that case
|
||||
# will be properly logged and handled very soon. That is when
|
||||
# trying to bind with a dead agent.
|
||||
}
|
||||
return context._plugin.get_agents(
|
||||
context._plugin_context,
|
||||
filters=agent_filters,
|
||||
)
|
||||
|
||||
def responsible_for_ports_allocation(self, context):
|
||||
"""Report if an agent is responsible for a resource provider.
|
||||
|
||||
:param context: PortContext instance describing the port
|
||||
:returns: True for responsible, False for not responsible
|
||||
|
||||
An agent based mechanism driver is reponsible for a resource provider
|
||||
if an agent of it is responsible for that resource provider. An agent
|
||||
reports responsibility by including the resource provider in the
|
||||
configurations field of the agent heartbeat.
|
||||
"""
|
||||
uuid_ns = self.resource_provider_uuid5_namespace
|
||||
if uuid_ns is None:
|
||||
return False
|
||||
if 'allocation' not in context.current['binding:profile']:
|
||||
return False
|
||||
|
||||
rp = uuid.UUID(context.current['binding:profile']['allocation'])
|
||||
host_agents = self._possible_agents_for_port(context)
|
||||
|
||||
reported = {}
|
||||
for agent in host_agents:
|
||||
if 'resource_provider_bandwidths' in agent['configurations']:
|
||||
for device in agent['configurations'][
|
||||
'resource_provider_bandwidths'].keys():
|
||||
device_rp_uuid = place_utils.device_resource_provider_uuid(
|
||||
namespace=uuid_ns,
|
||||
host=agent['host'],
|
||||
device=device)
|
||||
if device_rp_uuid == rp:
|
||||
reported[agent['id']] = agent
|
||||
|
||||
if len(reported) == 1:
|
||||
agent = list(reported.values())[0]
|
||||
LOG.debug("Agent %(agent)s of type %(agent_type)s reports to be "
|
||||
"responsible for resource provider %(rsc_provider)s",
|
||||
{'agent': agent['id'],
|
||||
'agent_type': agent['agent_type'],
|
||||
'rsc_provider': rp})
|
||||
return True
|
||||
elif len(reported) > 1:
|
||||
LOG.error("Agent misconfiguration, multiple agents on the same "
|
||||
"host %(host)s reports being responsible for resource "
|
||||
"provider %(rsc_provider)s: %(agents)s",
|
||||
{'host': context.current['binding:host_id'],
|
||||
'rsc_provider': rp,
|
||||
'agents': reported.keys()})
|
||||
return False
|
||||
else:
|
||||
# not responsible, must be somebody else
|
||||
return False
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class SimpleAgentMechanismDriverBase(AgentMechanismDriverBase):
|
||||
|
@ -22,6 +22,7 @@ from neutron_lib import constants
|
||||
from neutron_lib.db import api as db_api
|
||||
from neutron_lib import exceptions as exc
|
||||
from neutron_lib.exceptions import multiprovidernet as mpnet_exc
|
||||
from neutron_lib.exceptions import placement as place_exc
|
||||
from neutron_lib.exceptions import vlantransparent as vlan_exc
|
||||
from neutron_lib.plugins.ml2 import api
|
||||
from oslo_config import cfg
|
||||
@ -757,12 +758,18 @@ class MechanismManager(stevedore.named.NamedExtensionManager):
|
||||
'vnic_type': binding.vnic_type,
|
||||
'segments': context.network.network_segments})
|
||||
|
||||
def _bind_port_level(self, context, level, segments_to_bind):
|
||||
def _bind_port_level(self, context, level, segments_to_bind,
|
||||
drivers=None, redoing_bottom=False):
|
||||
if drivers is None:
|
||||
drivers = self.ordered_mech_drivers
|
||||
|
||||
binding = context._binding
|
||||
port_id = context.current['id']
|
||||
LOG.debug("Attempting to bind port %(port)s on host %(host)s "
|
||||
"at level %(level)s using segments %(segments)s",
|
||||
LOG.debug("Attempting to bind port %(port)s by drivers %(drivers)s "
|
||||
"on host %(host)s at level %(level)s using "
|
||||
"segments %(segments)s",
|
||||
{'port': port_id,
|
||||
'drivers': ','.join([driver.name for driver in drivers]),
|
||||
'host': context.host,
|
||||
'level': level,
|
||||
'segments': segments_to_bind})
|
||||
@ -774,7 +781,7 @@ class MechanismManager(stevedore.named.NamedExtensionManager):
|
||||
'host': context.host})
|
||||
return False
|
||||
|
||||
for driver in self.ordered_mech_drivers:
|
||||
for driver in drivers:
|
||||
if not self._check_driver_to_bind(driver, segments_to_bind,
|
||||
context._binding_levels):
|
||||
continue
|
||||
@ -806,6 +813,61 @@ class MechanismManager(stevedore.named.NamedExtensionManager):
|
||||
'lvl': level + 1})
|
||||
context._pop_binding_level()
|
||||
else:
|
||||
# NOTE(bence romsics): Consider: "In case of
|
||||
# hierarchical port binding binding_profile.allocation
|
||||
# [decided and sent by Placement and Nova]
|
||||
# is meant to drive the binding only on the binding
|
||||
# level that represents the closest physical interface
|
||||
# to the nova server." Link to spec:
|
||||
#
|
||||
# https://review.openstack.org/#/c/508149/14/specs\
|
||||
# /rocky/minimum-bandwidth-\
|
||||
# allocation-placement-api.rst@582
|
||||
#
|
||||
# But we cannot tell if a binding level is
|
||||
# the bottom binding level before set_binding()
|
||||
# gets called, and that's already too late. So we
|
||||
# must undo the last binding after set_binding()
|
||||
# was called and redo the last level trying to
|
||||
# bind only with one driver as inferred from
|
||||
# the allocation. In order to undo the binding
|
||||
# here we must also assume that each driver's
|
||||
# bind_port() implementation is side effect free
|
||||
# beyond calling set_binding().
|
||||
#
|
||||
# Also please note that technically we allow for
|
||||
# a redo to call continue_binding() instead of
|
||||
# set_binding() and by that turn what was supposed
|
||||
# to be the bottom level into a non-bottom binding
|
||||
# level. A thorough discussion is recommended if
|
||||
# you think of taking advantage of this.
|
||||
#
|
||||
# Also if we find use cases requiring
|
||||
# diamond-shaped selections of drivers on different
|
||||
# levels (eg. driverA and driverB can be both
|
||||
# a valid choice on level 0, but on level 1 both
|
||||
# previous choice leads to driverC) then we need
|
||||
# to restrict segment selection too based on
|
||||
# traits of the allocated resource provider on
|
||||
# the top binding_level (==0).
|
||||
if (context.current['binding:profile'] is not None and
|
||||
'allocation' in context.current[
|
||||
'binding:profile'] and
|
||||
not redoing_bottom):
|
||||
LOG.debug(
|
||||
"Undo bottom bound level and redo it "
|
||||
"according to binding_profile.allocation, "
|
||||
"resource provider uuid: %s",
|
||||
context.current[
|
||||
'binding:profile']['allocation'])
|
||||
context._pop_binding_level()
|
||||
context._unset_binding()
|
||||
return self._bind_port_level(
|
||||
context, level, segments_to_bind,
|
||||
drivers=[self._infer_driver_from_allocation(
|
||||
context)],
|
||||
redoing_bottom=True)
|
||||
|
||||
# Binding complete.
|
||||
LOG.debug("Bound port: %(port)s, "
|
||||
"host: %(host)s, "
|
||||
@ -823,6 +885,59 @@ class MechanismManager(stevedore.named.NamedExtensionManager):
|
||||
"bind_port",
|
||||
driver.name)
|
||||
|
||||
def _infer_driver_from_allocation(self, context):
|
||||
"""Choose mechanism driver as implied by allocation in placement.
|
||||
|
||||
:param context: PortContext instance describing the port
|
||||
:returns: a single MechanismDriver instance
|
||||
|
||||
Ports allocated to a resource provider (ie. a physical network
|
||||
interface) in Placement have the UUID of the provider in their
|
||||
binding:profile.allocation. The choice of a physical network
|
||||
interface (as recorded in the allocation) implies a choice of
|
||||
mechanism driver too. When an allocation was received we expect
|
||||
exactly one mechanism driver to be responsible for that physical
|
||||
network interface resource provider.
|
||||
"""
|
||||
|
||||
drivers = []
|
||||
for driver in self.ordered_mech_drivers:
|
||||
if driver.obj.responsible_for_ports_allocation(context):
|
||||
drivers.append(driver)
|
||||
|
||||
if len(drivers) == 0:
|
||||
LOG.error("Failed to bind port %(port)s on host "
|
||||
"%(host)s allocated on resource provider "
|
||||
"%(rsc_provider)s, because no mechanism driver "
|
||||
"reports being responsible",
|
||||
{'port': context.current['id'],
|
||||
'host': context.host,
|
||||
'rsc_provider': context.current[
|
||||
'binding:profile']['allocation']})
|
||||
raise place_exc.UnknownResourceProvider(
|
||||
rsc_provider=context.current['binding:profile']['allocation'])
|
||||
|
||||
if len(drivers) >= 2:
|
||||
raise place_exc.AmbiguousResponsibilityForResourceProvider(
|
||||
rsc_provider=context.current['binding:profile']['allocation'],
|
||||
drivers=','.join([driver.name for driver in drivers]))
|
||||
|
||||
# NOTE(bence romsics): The error conditions for raising either
|
||||
# UnknownResourceProvider or AmbiguousResponsibilityForResourceProvider
|
||||
# are pretty static therefore the usual 10-times-retry of a binding
|
||||
# failure could easily be unnecessary in those cases. However at this
|
||||
# point special handling of these exceptions in the binding retry loop
|
||||
# seems like premature optimization to me since these exceptions are
|
||||
# always a sign of a misconfigured neutron deployment.
|
||||
|
||||
LOG.debug("Restricting possible bindings of port %(port)s "
|
||||
"(as inferred from placement allocation) to "
|
||||
"mechanism driver '%(driver)s'",
|
||||
{'port': context.current['id'],
|
||||
'driver': drivers[0].name})
|
||||
|
||||
return drivers[0]
|
||||
|
||||
def is_host_filtering_supported(self):
|
||||
return all(driver.obj.is_host_filtering_supported()
|
||||
for driver in self.ordered_mech_drivers)
|
||||
|
@ -246,6 +246,22 @@ class AgentMechanismGenericTestCase(AgentMechanismBaseTestCase):
|
||||
self.driver.bind_port(context)
|
||||
self._check_unbound(context)
|
||||
|
||||
def test_driver_not_responsible_for_ports_allocation(self):
|
||||
agents = [
|
||||
{'configurations': {'rp_bandwidths': {'eth0': {}}},
|
||||
'host': 'host'},
|
||||
]
|
||||
profile = {}
|
||||
segments = []
|
||||
port_ctx = FakePortContext(
|
||||
self.AGENT_TYPE,
|
||||
agents,
|
||||
segments,
|
||||
vnic_type=portbindings.VNIC_DIRECT,
|
||||
profile=profile)
|
||||
self.assertFalse(
|
||||
self.driver.responsible_for_ports_allocation(port_ctx))
|
||||
|
||||
|
||||
class AgentMechanismLocalTestCase(AgentMechanismBaseTestCase):
|
||||
LOCAL_SEGMENTS = [{api.ID: 'unknown_segment_id',
|
||||
|
@ -60,3 +60,7 @@ class FakeAgentMechanismDriver(mech_agent.SimpleAgentMechanismDriverBase):
|
||||
|
||||
def get_mappings(self, agent):
|
||||
return dict(agent['configurations'].get('interface_mappings', {}))
|
||||
|
||||
|
||||
class AnotherFakeAgentMechanismDriver(FakeAgentMechanismDriver):
|
||||
pass
|
||||
|
@ -87,6 +87,28 @@ class SriovSwitchMechGenericTestCase(SriovNicSwitchMechanismBaseTestCase,
|
||||
segment = {api.NETWORK_TYPE: network_type}
|
||||
self.assertTrue(self.driver.check_segment_for_agent(segment))
|
||||
|
||||
def test_driver_responsible_for_ports_allocation(self):
|
||||
agents = [
|
||||
{'agent_type': 'NIC Switch agent',
|
||||
'configurations': {'resource_provider_bandwidths': {'eth0': {}}},
|
||||
'host': 'host',
|
||||
'id': '1'}
|
||||
]
|
||||
segments = []
|
||||
# uuid -v5 87f1895c-73bb-11e8-9008-c4d987b2a692 host:eth0
|
||||
profile = {'allocation': '5762cf50-781b-5f01-8ebc-0cce8c9e74cd'}
|
||||
|
||||
port_ctx = base.FakePortContext(
|
||||
self.AGENT_TYPE,
|
||||
agents,
|
||||
segments,
|
||||
vnic_type=portbindings.VNIC_DIRECT,
|
||||
profile=profile)
|
||||
with mock.patch.object(self.driver, '_possible_agents_for_port',
|
||||
return_value=agents):
|
||||
self.assertTrue(
|
||||
self.driver.responsible_for_ports_allocation(port_ctx))
|
||||
|
||||
|
||||
class SriovMechVlanTestCase(SriovNicSwitchMechanismBaseTestCase,
|
||||
base.AgentMechanismBaseTestCase):
|
||||
|
@ -166,7 +166,27 @@ class OpenvswitchMechanismHybridPlugTestCase(OpenvswitchMechanismBaseTestCase):
|
||||
|
||||
class OpenvswitchMechanismGenericTestCase(OpenvswitchMechanismBaseTestCase,
|
||||
base.AgentMechanismGenericTestCase):
|
||||
pass
|
||||
def test_driver_responsible_for_ports_allocation(self):
|
||||
agents = [
|
||||
{'agent_type': 'Open vSwitch agent',
|
||||
'configurations': {'resource_provider_bandwidths': {'eth0': {}}},
|
||||
'id': '1',
|
||||
'host': 'host'}
|
||||
]
|
||||
segments = []
|
||||
# uuid -v5 87ee7d5c-73bb-11e8-9008-c4d987b2a692 host:eth0
|
||||
profile = {'allocation': '13cc0ed9-e802-5eaa-b4c7-3441855e31f2'}
|
||||
|
||||
port_ctx = base.FakePortContext(
|
||||
self.AGENT_TYPE,
|
||||
agents,
|
||||
segments,
|
||||
vnic_type=portbindings.VNIC_NORMAL,
|
||||
profile=profile)
|
||||
with mock.patch.object(self.driver, '_possible_agents_for_port',
|
||||
return_value=agents):
|
||||
self.assertTrue(
|
||||
self.driver.responsible_for_ports_allocation(port_ctx))
|
||||
|
||||
|
||||
class OpenvswitchMechanismLocalTestCase(OpenvswitchMechanismBaseTestCase,
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
import mock
|
||||
|
||||
from neutron_lib.exceptions import placement as place_exc
|
||||
from neutron_lib.plugins.ml2 import api
|
||||
from oslo_config import cfg
|
||||
from oslo_db import exception as db_exc
|
||||
@ -68,6 +69,60 @@ class TestManagers(base.BaseTestCase):
|
||||
manager._bind_port_level(self.context, 0, self.segments_to_bind)
|
||||
self.assertEqual(0, bind_port.call_count)
|
||||
|
||||
def test__infer_driver_from_allocation_positive(self):
|
||||
cfg.CONF.set_override(
|
||||
'mechanism_drivers', ['fake_agent'], group='ml2')
|
||||
manager = managers.MechanismManager()
|
||||
with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver,
|
||||
'responsible_for_ports_allocation',
|
||||
return_value=True):
|
||||
responsible_driver = manager._infer_driver_from_allocation(
|
||||
FakePortContext(
|
||||
None,
|
||||
None,
|
||||
self.segments_to_bind,
|
||||
profile={'allocation': 'fake_resource_provider'}))
|
||||
self.assertEqual(responsible_driver.name, 'fake_agent')
|
||||
|
||||
def test__infer_driver_from_allocation_negative(self):
|
||||
cfg.CONF.set_override(
|
||||
'mechanism_drivers', ['fake_agent'], group='ml2')
|
||||
manager = managers.MechanismManager()
|
||||
with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver,
|
||||
'responsible_for_ports_allocation',
|
||||
return_value=False):
|
||||
self.assertRaises(
|
||||
place_exc.UnknownResourceProvider,
|
||||
manager._infer_driver_from_allocation,
|
||||
FakePortContext(
|
||||
None,
|
||||
None,
|
||||
self.segments_to_bind,
|
||||
profile={'allocation': 'fake_resource_provider'})
|
||||
)
|
||||
|
||||
def test__infer_driver_from_allocation_ambiguous(self):
|
||||
cfg.CONF.set_override(
|
||||
'mechanism_drivers',
|
||||
['fake_agent', 'another_fake_agent'],
|
||||
group='ml2')
|
||||
manager = managers.MechanismManager()
|
||||
with mock.patch.object(mech_fake_agent.FakeAgentMechanismDriver,
|
||||
'responsible_for_ports_allocation',
|
||||
return_value=True), \
|
||||
mock.patch.object(mech_fake_agent.AnotherFakeAgentMechanismDriver,
|
||||
'responsible_for_ports_allocation',
|
||||
return_value=True):
|
||||
self.assertRaises(
|
||||
place_exc.AmbiguousResponsibilityForResourceProvider,
|
||||
manager._infer_driver_from_allocation,
|
||||
FakePortContext(
|
||||
None,
|
||||
None,
|
||||
self.segments_to_bind,
|
||||
profile={'allocation': 'fake_resource_provider'})
|
||||
)
|
||||
|
||||
@mock.patch.object(managers.LOG, 'critical')
|
||||
@mock.patch.object(managers.MechanismManager, '_driver_not_loaded')
|
||||
def test__driver_not_found(self, mock_not_loaded, mock_log):
|
||||
|
@ -640,3 +640,20 @@ class ExtendedPortBindingTestCase(test_plugin.NeutronDbPluginV2TestCase):
|
||||
port, new_binding = self._create_port_and_binding()
|
||||
response = self._delete_port_binding(port['id'], 'other-host')
|
||||
self.assertEqual(webob.exc.HTTPNotFound.code, response.status_int)
|
||||
|
||||
def test_binding_fail_for_unknown_allocation(self):
|
||||
# The UUID is a random one - which of course is unknown to neutron
|
||||
# as a resource provider UUID.
|
||||
profile = {'allocation': 'ccccbb4c-2adf-11e9-91bc-db7063775d06'}
|
||||
kwargs = {pbe_ext.PROFILE: profile}
|
||||
device_owner = '%s%s' % (const.DEVICE_OWNER_COMPUTE_PREFIX, 'nova')
|
||||
|
||||
with self.port(device_owner=device_owner) as port:
|
||||
port_id = port['port']['id']
|
||||
response = self._create_port_binding(
|
||||
self.fmt, port_id, self.host, **kwargs)
|
||||
|
||||
self.assertEqual(webob.exc.HTTPInternalServerError.code,
|
||||
response.status_int)
|
||||
self.assertTrue(exceptions.PortBindingError.__name__ in
|
||||
response.text)
|
||||
|
@ -92,6 +92,7 @@ neutron.ml2.mechanism_drivers =
|
||||
l2population = neutron.plugins.ml2.drivers.l2pop.mech_driver:L2populationMechanismDriver
|
||||
sriovnicswitch = neutron.plugins.ml2.drivers.mech_sriov.mech_driver.mech_driver:SriovNicSwitchMechanismDriver
|
||||
fake_agent = neutron.tests.unit.plugins.ml2.drivers.mech_fake_agent:FakeAgentMechanismDriver
|
||||
another_fake_agent = neutron.tests.unit.plugins.ml2.drivers.mech_fake_agent:AnotherFakeAgentMechanismDriver
|
||||
faulty_agent = neutron.tests.unit.plugins.ml2.drivers.mech_faulty_agent:FaultyAgentMechanismDriver
|
||||
neutron.ml2.extension_drivers =
|
||||
test = neutron.tests.unit.plugins.ml2.drivers.ext_test:TestExtensionDriver
|
||||
|
Loading…
Reference in New Issue
Block a user