Browse Source

Tune _get_candidates for faster scheduling in dvr

Right now we have seen some performance issues when
dvr routers are scheduled on multiple compute nodes
with thousands of VMs on the routed subnets.

The _get_candidates call get_l3_agent_candidates with
a complete list of agents irrespective of the routers
already hosted on the agents or not.

So this fix will reduce the amount of iterations that
get_l3_agent_candidates need to process for all the
agents and would increase the control plane performance.

Closes-Bug: #1513678

Conflicts:

	neutron/scheduler/l3_agent_scheduler.py

Change-Id: I8f781d4cbc996ce13441303c9296e4f6ec822b94
(cherry picked from commit 411e6ff157)
tags/7.0.2
Swaminathan Vasudevan 4 years ago
parent
commit
5bbff07beb
2 changed files with 39 additions and 15 deletions
  1. +12
    -15
      neutron/scheduler/l3_agent_scheduler.py
  2. +27
    -0
      neutron/tests/unit/scheduler/test_l3_agent_scheduler.py

+ 12
- 15
neutron/scheduler/l3_agent_scheduler.py View File

@@ -167,33 +167,30 @@ class L3Scheduler(object):
# one enabled l3 agent hosting since active is just a
# timing problem. Non-active l3 agent can return to
# active any time
l3_agents = plugin.get_l3_agents_hosting_routers(
current_l3_agents = plugin.get_l3_agents_hosting_routers(
context, [sync_router['id']], admin_state_up=True)
if l3_agents and not sync_router.get('distributed', False):
is_router_distributed = sync_router.get('distributed', False)
if current_l3_agents and not is_router_distributed:
LOG.debug('Router %(router_id)s has already been hosted'
' by L3 agent %(agent_id)s',
{'router_id': sync_router['id'],
'agent_id': l3_agents[0]['id']})
'agent_id': current_l3_agents[0]['id']})
return []

active_l3_agents = plugin.get_l3_agents(context, active=True)
if not active_l3_agents:
LOG.warn(_LW('No active L3 agents'))
return []
new_l3agents = plugin.get_l3_agent_candidates(context,
sync_router,
active_l3_agents)
old_l3agentset = set(l3_agents)
if sync_router.get('distributed', False):
new_l3agentset = set(new_l3agents)
candidates = list(new_l3agentset - old_l3agentset)
else:
candidates = new_l3agents
if not candidates:
potential_candidates = list(
set(active_l3_agents) - set(current_l3_agents))
new_l3agents = []
if potential_candidates:
new_l3agents = plugin.get_l3_agent_candidates(
context, sync_router, potential_candidates)
if not new_l3agents:
LOG.warn(_LW('No L3 agents can host the router %s'),
sync_router['id'])

return candidates
return new_l3agents

def _bind_routers(self, context, plugin, routers, l3_agent):
for router in routers:

+ 27
- 0
neutron/tests/unit/scheduler/test_l3_agent_scheduler.py View File

@@ -278,6 +278,33 @@ class L3SchedulerBaseTestCase(base.BaseTestCase):
router['distributed'] = True
plugin.get_l3_agents.return_value = []
iter(self.scheduler._get_candidates(plugin, mock.MagicMock(), router))
self.assertFalse(plugin.get_l3_agent_candidates.called)

def test__get_candidates_skips_get_l3_agent_candidates_if_dvr_scheduled(
self):
plugin = mock.MagicMock()
# distributed router already hosted
plugin.get_l3_agents_hosting_routers.return_value = ['a1']
router = {'distributed': True, 'id': str(uuid.uuid4())}
plugin.get_l3_agents.return_value = ['a1']
self.scheduler._get_candidates(plugin, mock.MagicMock(), router)
self.assertFalse(plugin.get_l3_agent_candidates.called)

def test__get_candidates_calls_get_l3_agent_candidates_if_agent_available(
self):
plugin = mock.MagicMock()
# distributed router already hosted in two agent 'a1' and 'a2'
plugin.get_l3_agents_hosting_routers.return_value = ['a1', 'a2']
router = {'distributed': True, 'id': str(uuid.uuid4())}
# Available distributed agents
plugin.get_l3_agents.return_value = ['a1', 'a2', 'a3', 'a4', 'a5']
unscheduled_agents = ['a3', 'a4', 'a5']
plugin.get_l3_agent_candidates.return_value = ['a3', 'a4']
agents_returned = self.scheduler._get_candidates(
plugin, mock.MagicMock(), router)
plugin.get_l3_agent_candidates.called_once_with(
mock.ANY, router, unscheduled_agents)
self.assertEqual(['a3', 'a4'], sorted(agents_returned))


class L3SchedulerBaseMixin(object):

Loading…
Cancel
Save