Filter out unsatisfied routers in SQL

We saw auto_schedule_routers took over 40 seconds
for a DVR enabled environment with option
auto_schedule_routers enabled.

Adding new arguments to get_router_agents_count and
dealing with routers separately depending on whether
it is a regular router or HA. The benefits are
we do not need to loop over every router available in
environment. Another reason for doing this is that
get_router_agents_count is used solely to heal
routers with less than required agents so number of
routers with less agents is small for most of the times.

Related-Bug: #1973656

Change-Id: Ic29275815a8c32cee7a6470509687a18fa594514
This commit is contained in:
ushen 2022-05-17 18:50:44 +08:00
parent b73399fa74
commit d4654e3011
4 changed files with 46 additions and 11 deletions

View File

@ -397,12 +397,12 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
return {'agents': [self._make_agent_dict(agent)
for agent in agents]}
def get_routers_l3_agents_count(self, context):
def get_routers_l3_agents_count(self, context, ha=False, less_than=0):
"""Return a map between routers and agent counts for all routers."""
# TODO(sshank): This portion needs Router OVO integration when it is
# merged.
l3_model_list = l3_objs.RouterExtraAttributes.get_router_agents_count(
context)
context, ha=ha, less_than=less_than)
return [(self._make_router_dict(router_model),
agent_count if agent_count else 0)
for router_model, agent_count in l3_model_list]

View File

@ -109,7 +109,7 @@ class RouterExtraAttributes(base.NeutronDbObject):
@classmethod
@db_api.CONTEXT_READER
def get_router_agents_count(cls, context):
def get_router_agents_count(cls, context, ha=False, less_than=0):
# TODO(sshank): This is pulled out from l3_agentschedulers_db.py
# until a way to handle joins is figured out.
binding_model = rb_model.RouterL3AgentBinding
@ -121,9 +121,12 @@ class RouterExtraAttributes(base.NeutronDbObject):
l3_attrs.RouterExtraAttributes.router_id).
join(l3.Router).
group_by(binding_model.router_id).subquery())
query = (context.session.query(l3.Router, sub_query.c.count).
outerjoin(sub_query))
count = func.coalesce(sub_query.c.count, 0)
query = (context.session.query(l3.Router, count).
outerjoin(sub_query).join(l3_attrs.RouterExtraAttributes).
filter(l3_attrs.RouterExtraAttributes.ha == ha))
if less_than > 0:
query = query.filter(count < less_than)
return list(query)

View File

@ -103,11 +103,18 @@ class L3Scheduler(object, metaclass=abc.ABCMeta):
underscheduled_routers = []
max_agents_for_ha = plugin.get_number_of_agents_for_scheduling(context)
for router, count in plugin.get_routers_l3_agents_count(context):
if (count < 1 or
router.get('ha', False) and count < max_agents_for_ha):
# Either the router was un-scheduled (scheduled to 0 agents),
# or it's an HA router and it was under-scheduled (scheduled to
# since working out a unified SQL is hard for both regular and
# ha routers. Split its up and run queries separately
for router, count in plugin.get_routers_l3_agents_count(
context, ha=False, less_than=1):
if count < 1:
# the router was un-scheduled (scheduled to 0 agents),
underscheduled_routers.append(router)
for router, count in plugin.get_routers_l3_agents_count(
context, ha=True, less_than=max_agents_for_ha):
if count < max_agents_for_ha:
# it's an HA router and it was under-scheduled (scheduled to
# less than max_agents_for_ha). Either way, it should be added
# to the list of routers we want to handle.
underscheduled_routers.append(router)

View File

@ -1640,6 +1640,8 @@ class L3_HA_scheduler_db_mixinTestCase(L3HATestCaseMixin):
router2 = self._create_ha_router()
router3 = self._create_ha_router(ha=False)
result = self.plugin.get_routers_l3_agents_count(self.adminContext)
result += self.plugin.get_routers_l3_agents_count(
self.adminContext, ha=True)
self.assertEqual(3, len(result))
check_result = [(router['id'], agents) for router, agents in result]
@ -1647,6 +1649,29 @@ class L3_HA_scheduler_db_mixinTestCase(L3HATestCaseMixin):
self.assertIn((router2['id'], 2), check_result)
self.assertIn((router3['id'], 0), check_result)
result = self.plugin.get_routers_l3_agents_count(self.adminContext,
ha=True, less_than=3)
check_result = [(router['id'], agents) for router, agents in result]
self.assertIn((router2['id'], 2), check_result)
def test_get_routers_not_ha_l3_agents_count(self):
router1 = self._create_ha_router(ha=False)
router2 = self._create_ha_router(ha=False)
self.plugin.schedule_router(self.adminContext, router1['id'],
candidates=[self.agent1])
result = self.plugin.get_routers_l3_agents_count(self.adminContext)
self.assertEqual(2, len(result))
check_result = [(router['id'], agents) for router, agents in result]
self.assertIn((router1['id'], 1), check_result)
self.assertIn((router2['id'], 0), check_result)
result = self.plugin.get_routers_l3_agents_count(self.adminContext,
less_than=1)
check_result = [(router['id'], agents) for router, agents in result]
self.assertIn((router2['id'], 0), check_result)
self.assertNotIn((router1['id'], 1), check_result)
def test_get_ordered_l3_agents_by_num_routers(self):
# Mock scheduling so that the test can control it explicitly
mock.patch.object(l3_hamode_db.L3_HA_NAT_db_mixin,