DVR:Pro-active router creation with live migration

Today DVR routers are created after a dvr service port is
seen on a given node. But in the case of instance live
migration, the creation of l3 routed networks on the
destination node is delayed since we react to the event.

This patch tries to proactively create routers on the
destination node based on the portbinding profile info
updated by the nova when the instance is on a pre-migration
state.

Nova calls setup_network_on_host during the pre-migration
phase and we update the portbinding profile dict with
an attribute 'migrating_to' as shown below

port:{'binding:profile':{'migrating_to': 'host'}}

where 'host' points to the 'destination' of the port.

L3 plugin will verify the migration profile for the port on
any port update and then take action to create routers in the
respective agents if routers have not been created.

If the live migration fails or if reverted, then the port
binding profile attribute 'migrating_to' will be cleared from
the port profile. In this case, the router and the fip namespace
may be created on the destination node, but since the VM did
not land on the destination node, it would not cause any issues,
since the traffic will still be flowing out from the origination
node, except for the existence of the router and fip namespace.

For some reason if the creation of the router namespace and fip
namespace fails, then the live migration may still proceed as
it is now and the agent will create the router namespace and fip
namespace reactively.

The case were we report status back to Nova and Nova reacting
to the setup_networks_on_host status will be handled in the
upcoming release.

This patch should not affect any upgrades with respect to the
agent or server.

Change-Id: Ibb62f012333cfdfd468bafdc0b4501aa46b4b54d
Related-Bug: #1456073
This commit is contained in:
Swaminathan Vasudevan 2016-02-02 12:44:16 -08:00
parent 412012de59
commit f0bdb798fa
6 changed files with 225 additions and 18 deletions

View File

@ -474,7 +474,8 @@ class L3_NAT_with_dvr_db_mixin(l3_db.L3_NAT_db_mixin,
if router:
router_floatingips = router.get(l3_const.FLOATINGIP_KEY, [])
if router['distributed']:
if floating_ip.get('host', None) != host:
if (floating_ip.get('host', None) != host and
floating_ip.get('dest_host') is None):
continue
LOG.debug("Floating IP host: %s", floating_ip['host'])
router_floatingips.append(floating_ip)
@ -507,16 +508,29 @@ class L3_NAT_with_dvr_db_mixin(l3_db.L3_NAT_db_mixin,
floating_ip_port_ids = [fip['port_id'] for fip in floating_ips
if fip['router_id'] in dvr_router_ids]
if floating_ip_port_ids:
port_filter = {portbindings.HOST_ID: [host],
'id': floating_ip_port_ids}
port_filter = {'id': floating_ip_port_ids}
ports = self._core_plugin.get_ports(context, port_filter)
port_dict = dict((port['id'], port) for port in ports)
port_dict = {}
for port in ports:
# Make sure that we check for cases were the port
# might be in a pre-live migration state or also
# check for the portbinding profile 'migrating_to'
# key for the host.
port_profile = port.get(portbindings.PROFILE)
port_in_migration = (
port_profile and
port_profile.get('migrating_to') == host)
if (port[portbindings.HOST_ID] == host or port_in_migration):
port_dict.update({port['id']: port})
# Add the port binding host to the floatingip dictionary
for fip in floating_ips:
vm_port = port_dict.get(fip['port_id'], None)
if vm_port:
fip['host'] = self._get_dvr_service_port_hostid(
context, fip['port_id'], port=vm_port)
fip['dest_host'] = (
self._get_dvr_migrating_service_port_hostid(
context, fip['port_id'], port=vm_port))
routers_dict = self._process_routers(context, routers)
self._process_floating_ips_dvr(context, routers_dict,
floating_ips, host, agent)
@ -540,6 +554,18 @@ class L3_NAT_with_dvr_db_mixin(l3_db.L3_NAT_db_mixin,
if n_utils.is_dvr_serviced(device_owner):
return port_db[portbindings.HOST_ID]
def _get_dvr_migrating_service_port_hostid(
self, context, port_id, port=None):
"""Returns the migrating host_id from the migrating profile."""
port_db = port or self._core_plugin.get_port(context, port_id)
port_profile = port_db.get(portbindings.PROFILE)
port_dest_host = None
if port_profile:
port_dest_host = port_profile.get('migrating_to')
device_owner = port_db['device_owner'] if port_db else ""
if n_utils.is_dvr_serviced(device_owner):
return port_dest_host
def _get_agent_gw_ports_exist_for_network(
self, context, network_id, host, agent_id):
"""Return agent gw port if exist, or None otherwise."""
@ -570,6 +596,20 @@ class L3_NAT_with_dvr_db_mixin(l3_db.L3_NAT_db_mixin,
if host_id:
return
def check_for_fip_and_create_agent_gw_port_on_host_if_not_exists(
self, context, port, host):
"""Create fip agent_gw_port on host if not exists"""
fip = self._get_floatingip_on_port(context, port_id=port['id'])
if not fip:
return
network_id = fip.get('floating_network_id')
agent_gw_port = self.create_fip_agent_gw_port_if_not_exists(
context.elevated(), network_id, host)
LOG.debug("Port-in-Migration: Floatingip Agent Gateway port "
"%(gw)s created for the future host: %(dest_host)s",
{'gw': agent_gw_port,
'dest_host': host})
def create_fip_agent_gw_port_if_not_exists(
self, context, network_id, host):
"""Function to return the FIP Agent GW port.
@ -790,8 +830,13 @@ class L3_NAT_with_dvr_db_mixin(l3_db.L3_NAT_db_mixin,
if is_distributed_router(router):
host = self._get_dvr_service_port_hostid(context, fixed_port_id)
dest_host = self._get_dvr_migrating_service_port_hostid(
context, fixed_port_id)
self.l3_rpc_notifier.routers_updated_on_host(
context, [router_id], host)
if dest_host and dest_host != host:
self.l3_rpc_notifier.routers_updated_on_host(
context, [router_id], dest_host)
else:
self.notify_router_updated(context, router_id)

View File

@ -75,19 +75,32 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin):
the state of the router and the Compute Nodes.
"""
def dvr_handle_new_service_port(self, context, port):
def dvr_handle_new_service_port(self, context, port, dest_host=None):
"""Handle new dvr service port creation.
When a new dvr service port is created, this function will
schedule a dvr router to new compute node if needed and notify
l3 agent on that node.
The 'dest_host' will provide the destinaton host of the port in
case of service port migration.
"""
port_host = port[portbindings.HOST_ID]
port_host = dest_host or port[portbindings.HOST_ID]
l3_agent_on_host = (self.get_l3_agents(
context, filters={'host': [port_host]}) or [None])[0]
if not l3_agent_on_host:
return
if dest_host:
# Make sure we create the floatingip agent gateway port
# for the destination node if fip is associated with this
# fixed port
l3plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)
(
l3plugin.
check_for_fip_and_create_agent_gw_port_on_host_if_not_exists(
context, port, dest_host))
subnet_ids = [ip['subnet_id'] for ip in port['fixed_ips']]
router_ids = self.get_dvr_routers_by_subnet_ids(context, subnet_ids)
if router_ids:
@ -304,17 +317,26 @@ class L3_DVRsch_db_mixin(l3agent_sch_db.L3AgentSchedulerDbMixin):
if not subnet_ids:
return False
core_plugin = manager.NeutronManager.get_plugin()
filters = {'fixed_ips': {'subnet_id': subnet_ids},
portbindings.HOST_ID: [host]}
ports_query = core_plugin._get_ports_query(context, filters=filters)
owner_filter = or_(
Binding = ml2_models.PortBinding
IPAllocation = models_v2.IPAllocation
Port = models_v2.Port
query = context.session.query(Binding)
query = query.join(Binding.port)
query = query.join(Port.fixed_ips)
query = query.filter(
IPAllocation.subnet_id.in_(subnet_ids))
device_filter = or_(
models_v2.Port.device_owner.startswith(
n_const.DEVICE_OWNER_COMPUTE_PREFIX),
models_v2.Port.device_owner.in_(
n_utils.get_other_dvr_serviced_device_owners()))
ports_query = ports_query.filter(owner_filter)
return ports_query.first() is not None
query = query.filter(device_filter)
host_filter = or_(
ml2_models.PortBinding.host == host,
ml2_models.PortBinding.profile.contains(host))
query = query.filter(host_filter)
return query.first() is not None
def _notify_l3_agent_new_port(resource, event, trigger, **kwargs):
@ -352,6 +374,7 @@ def _notify_l3_agent_port_update(resource, event, trigger, **kwargs):
if new_port and original_port:
original_device_owner = original_port.get('device_owner', '')
new_device_owner = new_port.get('device_owner', '')
is_new_device_dvr_serviced = n_utils.is_dvr_serviced(new_device_owner)
l3plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)
context = kwargs['context']
@ -374,7 +397,7 @@ def _notify_l3_agent_port_update(resource, event, trigger, **kwargs):
}
_notify_port_delete(
event, resource, trigger, **removed_router_args)
if not n_utils.is_dvr_serviced(new_device_owner):
if not is_new_device_dvr_serviced:
return
is_fixed_ips_changed = (
'fixed_ips' in new_port and
@ -384,9 +407,17 @@ def _notify_l3_agent_port_update(resource, event, trigger, **kwargs):
new_port[portbindings.HOST_ID] and
(original_port[portbindings.HOST_ID] !=
new_port[portbindings.HOST_ID]))
if (is_new_port_binding_changed and
n_utils.is_dvr_serviced(new_device_owner)):
l3plugin.dvr_handle_new_service_port(context, new_port)
dest_host = None
new_port_profile = new_port.get(portbindings.PROFILE)
if new_port_profile:
dest_host = new_port_profile.get('migrating_to')
# If dest_host is set, then the port profile has changed
# and this port is in migration. The call below will
# pre-create the router on the new host
if ((is_new_port_binding_changed or dest_host) and
is_new_device_dvr_serviced):
l3plugin.dvr_handle_new_service_port(context, new_port,
dest_host=dest_host)
l3plugin.update_arp_entry_for_dvr_service_port(
context, new_port)
elif kwargs.get('mac_address_updated') or is_fixed_ips_changed:

View File

@ -528,6 +528,85 @@ class L3DvrTestCase(ml2_test_base.ML2TestFramework):
self._test_router_remove_from_agent_on_vm_port_deletion(
non_admin_port=True)
def test_dvr_router_notifications_for_live_migration_with_fip(self):
self._dvr_router_notifications_for_live_migration(
with_floatingip=True)
def test_dvr_router_notifications_for_live_migration_without_fip(self):
self._dvr_router_notifications_for_live_migration()
def _dvr_router_notifications_for_live_migration(
self, with_floatingip=False):
"""Check the router notifications go to the right hosts
with live migration without hostbinding on the port.
"""
# register l3 agents in dvr mode in addition to existing dvr_snat agent
HOST1, HOST2 = 'host1', 'host2'
for host in [HOST1, HOST2]:
helpers.register_l3_agent(
host=host, agent_mode=constants.L3_AGENT_MODE_DVR)
router = self._create_router()
arg_list = (portbindings.HOST_ID,)
with self.subnet() as ext_subnet,\
self.subnet(cidr='20.0.0.0/24') as subnet1,\
self.port(subnet=subnet1,
device_owner=DEVICE_OWNER_COMPUTE,
arg_list=arg_list,
**{portbindings.HOST_ID: HOST1}) as vm_port:
# make net external
ext_net_id = ext_subnet['subnet']['network_id']
self._update('networks', ext_net_id,
{'network': {external_net.EXTERNAL: True}})
# add external gateway to router
self.l3_plugin.update_router(
self.context, router['id'],
{'router': {
'external_gateway_info': {'network_id': ext_net_id}}})
self.l3_plugin.add_router_interface(
self.context, router['id'],
{'subnet_id': subnet1['subnet']['id']})
if with_floatingip:
floating_ip = {'floating_network_id': ext_net_id,
'router_id': router['id'],
'port_id': vm_port['port']['id'],
'tenant_id': vm_port['port']['tenant_id'],
'dns_name': '', 'dns_domain': ''}
floating_ip = self.l3_plugin.create_floatingip(
self.context, {'floatingip': floating_ip})
with mock.patch.object(self.l3_plugin,
'_l3_rpc_notifier') as l3_notifier,\
mock.patch.object(
self.l3_plugin,
'create_fip_agent_gw_port_if_not_exists'
) as fip_agent:
live_migration_port_profile = {
'migrating_to': HOST2
}
# Update the VM Port with Migration porbinding Profile.
# With this change, it should trigger a notification to
# the Destination host to create a Router ahead of time
# before the VM Port binding has changed to HOST2.
updated_port = self.core_plugin.update_port(
self.context, vm_port['port']['id'],
{'port': {
portbindings.PROFILE: live_migration_port_profile}})
l3_notifier.routers_updated_on_host.assert_called_once_with(
self.context, {router['id']}, HOST2)
# Check the port-binding is still with the old HOST1, but
# the router update notification has been sent to the new
# host 'HOST2' based on the live migration profile change.
self.assertEqual(updated_port[portbindings.HOST_ID], HOST1)
self.assertNotEqual(updated_port[portbindings.HOST_ID], HOST2)
if with_floatingip:
fip_agent.return_value = True
# Since we have already created the floatingip for the
# port, it should be creating the floatingip agent gw
# port for the new host if it does not exist.
fip_agent.assert_called_once_with(
mock.ANY, floating_ip['floating_network_id'], HOST2)
def test_router_notifications(self):
"""Check that notifications go to the right hosts in different
conditions

View File

@ -428,6 +428,10 @@ class L3DvrTestCase(test_db_base_plugin_v2.NeutronDbPluginV2TestCase):
with mock.patch.object(self.mixin, 'get_router') as grtr,\
mock.patch.object(self.mixin,
'_get_dvr_service_port_hostid') as vmp,\
mock.patch.object(
self.mixin,
'_get_dvr_migrating_service_port_hostid'
) as mvmp,\
mock.patch.object(
self.mixin,
'create_fip_agent_gw_port_if_not_exists') as c_fip,\
@ -435,6 +439,7 @@ class L3DvrTestCase(test_db_base_plugin_v2.NeutronDbPluginV2TestCase):
'_update_fip_assoc'):
grtr.return_value = router_db
vmp.return_value = 'my-host'
mvmp.return_value = 'my-future-host'
self.mixin._update_fip_assoc(
self.ctx, fip, floatingip_db, port)
return c_fip

View File

@ -943,6 +943,31 @@ class L3DvrSchedulerTestCase(testlib_api.SqlTestCase):
self.assertFalse(
l3plugin.dvr_handle_new_service_port.called)
def test__notify_l3_agent_update_port_with_migration_port_profile(self):
kwargs = {
'context': self.adminContext,
'original_port': {
portbindings.HOST_ID: 'vm-host',
'device_owner': DEVICE_OWNER_COMPUTE,
},
'port': {
portbindings.HOST_ID: 'vm-host',
'device_owner': DEVICE_OWNER_COMPUTE,
portbindings.PROFILE: {'migrating_to': 'vm-host2'},
},
}
l3plugin = mock.Mock()
with mock.patch.object(manager.NeutronManager,
'get_service_plugins',
return_value={'L3_ROUTER_NAT': l3plugin}):
l3_dvrscheduler_db._notify_l3_agent_port_update(
'port', 'after_update', mock.ANY, **kwargs)
l3plugin.dvr_handle_new_service_port.assert_called_once_with(
self.adminContext, kwargs.get('port'), dest_host='vm-host2')
l3plugin.update_arp_entry_for_dvr_service_port.\
assert_called_once_with(
self.adminContext, kwargs.get('port'))
def test__notify_l3_agent_update_port_no_action(self):
kwargs = {
'context': self.adminContext,
@ -1025,7 +1050,7 @@ class L3DvrSchedulerTestCase(testlib_api.SqlTestCase):
self.assertEqual(
1, l3plugin.delete_arp_entry_for_dvr_service_port.call_count)
l3plugin.dvr_handle_new_service_port.assert_called_once_with(
self.adminContext, kwargs.get('port'))
self.adminContext, kwargs.get('port'), dest_host=None)
def test__notify_l3_agent_update_port_removing_routers(self):
port_id = 'fake-port'

View File

@ -0,0 +1,22 @@
---
prelude: >
Fix DVR support for service port live migration.
fixes:
- Create DVR routers pro-actively on the destination
node for migrating dvr service port.
- If the DVR service port has associated floatingip,
then the floatingip namespace would be created on
the destination node.
issues:
- Right now we do not have status update notification
for L3 networks into Nova for nova to take necessary
action on failure to create the L3 networks.
So we do not report back failure and will be handled
in next release. In this case there might be a delay
in creating the routers and fip namespace after the
DVR service port migrates.
- If the nova reverts or cancels the live migration
after it informs the neutron and before it migrates
the dvr service port, then we do not cleanup the router
and fip namespace that was created on the destination
node.