Send update instead of remove for DVR reschedule

The current "agent remove-add" method to reschedule a router is not
friendly for DVR. In DVR cases, the old agent may still need to keep the
router in a non-master role to service any VM ports that exit on that
host while the new agent handles the master role of the router.

This patch proposes to send "update" instead of "remove" notification
for DVR rescheduling, which aligns with the retain_router check in
remove_router_from_l3_agent as well.

Closes-Bug: #1781179
Change-Id: I23431f9f46f72e6bce91f3d1fb0ed328d55930fb
This commit is contained in:
Kailun Qin 2018-07-12 03:18:23 +08:00
parent 5db3979581
commit 8b16b53b5b
2 changed files with 82 additions and 9 deletions

View File

@ -173,6 +173,24 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
l3_notifier.router_added_to_agent( l3_notifier.router_added_to_agent(
context, [router_id], agent.host) context, [router_id], agent.host)
def _check_router_retain_needed(self, context, router, host):
"""Check whether a router needs to be retained on a host.
Check whether there are DVR serviceable ports owned by the host of
a l3 agent. If so, then the routers should be retained.
"""
if not host:
return False
retain_router = False
if router.get('distributed'):
plugin = directory.get_plugin(plugin_constants.L3)
subnet_ids = plugin.get_subnet_ids_on_router(context, router['id'])
if subnet_ids:
retain_router = plugin._check_dvr_serviceable_ports_on_host(
context, host, subnet_ids)
return retain_router
def remove_router_from_l3_agent(self, context, agent_id, router_id): def remove_router_from_l3_agent(self, context, agent_id, router_id):
"""Remove the router from l3 agent. """Remove the router from l3 agent.
@ -187,19 +205,15 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
self._unbind_router(context, router_id, agent_id) self._unbind_router(context, router_id, agent_id)
router = self.get_router(context, router_id) router = self.get_router(context, router_id)
plugin = directory.get_plugin(plugin_constants.L3)
if router.get('ha'): if router.get('ha'):
plugin = directory.get_plugin(plugin_constants.L3)
plugin.delete_ha_interfaces_on_host(context, router_id, agent.host) plugin.delete_ha_interfaces_on_host(context, router_id, agent.host)
# NOTE(Swami): Need to verify if there are DVR serviceable # NOTE(Swami): Need to verify if there are DVR serviceable
# ports owned by this agent. If owned by this agent, then # ports owned by this agent. If owned by this agent, then
# the routers should be retained. This flag will be used # the routers should be retained. This flag will be used
# to check if there are valid routers in this agent. # to check if there are valid routers in this agent.
retain_router = False retain_router = self._check_router_retain_needed(context, router,
if router.get('distributed'): agent.host)
subnet_ids = plugin.get_subnet_ids_on_router(context, router_id)
if subnet_ids and agent.host:
retain_router = plugin._check_dvr_serviceable_ports_on_host(
context, agent.host, subnet_ids)
l3_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_L3) l3_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_L3)
if retain_router and l3_notifier: if retain_router and l3_notifier:
l3_notifier.routers_updated_on_host( l3_notifier.routers_updated_on_host(
@ -247,9 +261,16 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
old_hosts = [agent['host'] for agent in old_agents] old_hosts = [agent['host'] for agent in old_agents]
new_hosts = [agent['host'] for agent in new_agents] new_hosts = [agent['host'] for agent in new_agents]
router = self.get_router(context, router_id)
for host in set(old_hosts) - set(new_hosts): for host in set(old_hosts) - set(new_hosts):
l3_notifier.router_removed_from_agent( retain_router = self._check_router_retain_needed(context,
context, router_id, host) router, host)
if retain_router:
l3_notifier.routers_updated_on_host(
context, [router_id], host)
else:
l3_notifier.router_removed_from_agent(
context, router_id, host)
for agent in new_agents: for agent in new_agents:
try: try:

View File

@ -856,6 +856,58 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
router['router']['id'])['agents'] router['router']['id'])['agents']
self.assertEqual(0, len(l3_agents)) self.assertEqual(0, len(l3_agents))
def test_router_reschedule_no_remove_if_agent_has_dvr_service_ports(self):
l3_notifier = self.l3plugin.agent_notifiers[constants.AGENT_TYPE_L3]
agent_a = helpers.register_l3_agent(
host=L3_HOSTA, agent_mode=constants.L3_AGENT_MODE_DVR_SNAT)
agent_b = helpers.register_l3_agent(
host=L3_HOSTB, agent_mode=constants.L3_AGENT_MODE_DVR_SNAT)
with self.subnet() as s, \
mock.patch.object(l3_notifier.client, 'prepare',
return_value=l3_notifier.client) as mock_prepare, \
mock.patch.object(l3_notifier.client, 'cast') as mock_cast, \
mock.patch.object(l3_notifier.client, 'call'):
net_id = s['subnet']['network_id']
self._set_net_external(net_id)
router = {'name': 'router1',
'external_gateway_info': {'network_id': net_id},
'tenant_id': 'tenant_id',
'admin_state_up': True,
'distributed': True}
r = self.l3plugin.create_router(self.adminContext,
{'router': router})
# schedule the dvr to one of the agents
self.l3plugin.schedule_router(self.adminContext, r['id'])
l3agents = self.l3plugin.list_l3_agents_hosting_router(
self.adminContext, r['id'])
agent = l3agents['agents'][0]
# emulating dvr serviceable ports exist on the host
with mock.patch.object(
self.l3plugin, '_check_dvr_serviceable_ports_on_host') \
as ports_exist:
ports_exist.return_value = True
# reschedule the dvr to one of the other agent
candidate_agent = (agent_b if agent['host'] == L3_HOSTA
else agent_a)
self.l3plugin.reschedule_router(self.adminContext, r['id'],
candidates=[candidate_agent])
# make sure dvr serviceable ports are checked when rescheduling
self.assertTrue(ports_exist.called)
# make sure sending update instead of removing for dvr
mock_prepare.assert_called_with(server=candidate_agent['host'])
mock_cast.assert_called_with(
mock.ANY, 'routers_updated',
routers=[r['id']])
# make sure the rescheduling completes
l3agents = self.l3plugin.list_l3_agents_hosting_router(
self.adminContext, r['id'])
self.assertEqual(1, len(l3agents['agents']))
new_agent_host = l3agents['agents'][0]['host']
self.assertNotEqual(agent['host'], new_agent_host)
def test_router_auto_schedule_with_invalid_router(self): def test_router_auto_schedule_with_invalid_router(self):
with self.router() as router: with self.router() as router:
l3_rpc_cb = l3_rpc.L3RpcCallback() l3_rpc_cb = l3_rpc.L3RpcCallback()