Refactor L3 scheduler (unify code paths)

This patch proposes a (rather major) refactor to the L3 scheduler.
Basically, the auto_schedule_routers() code-path was split to 2
different code-paths, each dealing with a different case (unscheduled
routers vs underscheduled routers), in addition to the API-initiated
schedule() logic. This patch removes the 2 code-paths in favor of moving
most of the logic into schedule(). While the result is a slightly
longer schedule(), the benefit is that a lot of the previous
unmaintainable code-paths of auto_schedule_routers() are now removed.

Yay! :D

Related-Bug: #1609738
Change-Id: I227ca60422545e40d3bbb8baf2b41a8ce14f4294
This commit is contained in:
John Schwarz 2016-10-13 16:10:07 +03:00 committed by Ihar Hrachyshka
parent 4ae6790d82
commit e4b0b9f8be
5 changed files with 88 additions and 252 deletions

View File

@ -33,6 +33,7 @@ from neutron.common import _deprecate
from neutron.common import utils as n_utils
from neutron.db import agentschedulers_db
from neutron.db.models import agent as agent_model
from neutron.db.models import l3 as l3_model
from neutron.db.models import l3_attrs
from neutron.db.models import l3agent as rb_model
from neutron.extensions import l3agentscheduler
@ -399,6 +400,30 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
return {'agents': [self._make_agent_dict(binding.l3_agent) for
binding in bindings]}
def get_routers_l3_agents_count(self, context):
"""Return a map between routers and agent counts for all routers."""
# Postgres requires every column in the select to be present in
# the group by statement when using an aggregate function.
# One solution is to generate a subquery and join it with the desired
# columns.
binding_model = rb_model.RouterL3AgentBinding
sub_query = (context.session.query(
binding_model.router_id,
func.count(binding_model.router_id).label('count')).
join(l3_attrs.RouterExtraAttributes,
binding_model.router_id ==
l3_attrs.RouterExtraAttributes.router_id).
join(l3_model.Router).
group_by(binding_model.router_id).subquery())
query = (context.session.query(l3_model.Router, sub_query.c.count).
outerjoin(sub_query))
return [(self._make_router_dict(router), agent_count) if agent_count
else (self._make_router_dict(router), 0)
for router, agent_count in query]
def get_l3_agents(self, context, active=None, filters=None):
query = context.session.query(agent_model.Agent)
query = query.filter(
@ -469,7 +494,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
candidates.append(l3_agent)
return candidates
def auto_schedule_routers(self, context, host, router_ids):
def auto_schedule_routers(self, context, host, router_ids=None):
if self.router_scheduler:
self.router_scheduler.auto_schedule_routers(
self, context, host, router_ids)

View File

@ -15,47 +15,18 @@
from neutron_lib import constants
from neutron_lib.plugins import directory
from sqlalchemy import func
from sqlalchemy import sql
from neutron.callbacks import events
from neutron.callbacks import registry
from neutron.callbacks import resources
from neutron.db import l3_agentschedulers_db as l3_sch_db
from neutron.db.models import agent as agent_model
from neutron.db.models import l3 as l3_models
from neutron.db.models import l3_attrs
from neutron.db.models import l3agent as rb_model
from neutron.extensions import portbindings
class L3_HA_scheduler_db_mixin(l3_sch_db.AZL3AgentSchedulerDbMixin):
def get_ha_routers_l3_agents_count(self, context):
"""Return a map between HA routers and how many agents every
router is scheduled to.
"""
# Postgres requires every column in the select to be present in
# the group by statement when using an aggregate function.
# One solution is to generate a subquery and join it with the desired
# columns.
binding_model = rb_model.RouterL3AgentBinding
sub_query = (context.session.query(
binding_model.router_id,
func.count(binding_model.router_id).label('count')).
join(l3_attrs.RouterExtraAttributes,
binding_model.router_id ==
l3_attrs.RouterExtraAttributes.router_id).
join(l3_models.Router).
filter(l3_attrs.RouterExtraAttributes.ha == sql.true()).
group_by(binding_model.router_id).subquery())
query = (context.session.query(l3_models.Router, sub_query.c.count).
join(sub_query))
return [(self._make_router_dict(router), agent_count)
for router, agent_count in query]
def get_l3_agents_ordered_by_num_routers(self, context, agent_ids):
if not agent_ids:
return []

View File

@ -23,14 +23,13 @@ from neutron_lib import constants as lib_const
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_log import log as logging
from oslo_log import versionutils
import six
from sqlalchemy import sql
from neutron._i18n import _LW
from neutron.common import utils
from neutron.db import api as db_api
from neutron.db import l3_hamode_db
from neutron.db.models import l3 as l3_models
from neutron.db.models import l3agent as rb_model
from neutron.extensions import availability_zone as az_ext
from neutron.extensions import l3
@ -46,14 +45,13 @@ class L3Scheduler(object):
def __init__(self):
self.max_ha_agents = cfg.CONF.max_l3_agents_per_router
@abc.abstractmethod
def schedule(self, plugin, context, router_id,
candidates=None, hints=None):
def schedule(self, plugin, context, router_id, candidates=None):
"""Schedule the router to an active L3 agent.
Schedule the router only if it is not already scheduled.
"""
pass
return self._schedule_router(
plugin, context, router_id, candidates=candidates)
def _router_has_binding(self, context, router_id, l3_agent_id):
router_binding_model = rb_model.RouterL3AgentBinding
@ -64,52 +62,6 @@ class L3Scheduler(object):
return query.count() > 0
def _filter_unscheduled_routers(self, plugin, context, routers):
"""Filter from list of routers the ones that are not scheduled."""
unscheduled_routers = []
for router in routers:
l3_agents = plugin.get_l3_agents_hosting_routers(
context, [router['id']])
if l3_agents:
LOG.debug('Router %(router_id)s has already been '
'hosted by L3 agent %(agent_id)s',
{'router_id': router['id'],
'agent_id': l3_agents[0]['id']})
else:
unscheduled_routers.append(router)
return unscheduled_routers
def _get_unscheduled_routers(self, plugin, context):
"""Get routers with no agent binding."""
# TODO(gongysh) consider the disabled agent's router
no_agent_binding = ~sql.exists().where(
l3_models.Router.id ==
rb_model.RouterL3AgentBinding.router_id)
query = context.session.query(
l3_models.Router.id).filter(no_agent_binding)
unscheduled_router_ids = [router_id_[0] for router_id_ in query]
if unscheduled_router_ids:
return plugin.get_routers(
context, filters={'id': unscheduled_router_ids})
return []
def _get_routers_to_schedule(self, plugin, context, router_ids=None):
"""Verify that the routers specified need to be scheduled.
:param context: the context
:param plugin: the core plugin
:param router_ids: the list of routers to be checked for scheduling
:returns: the list of routers to be scheduled
"""
if router_ids is not None:
filters = {'id': router_ids}
routers = plugin.get_routers(context, filters=filters)
result = self._filter_unscheduled_routers(plugin, context, routers)
else:
result = self._get_unscheduled_routers(plugin, context)
return [r for r in result
if plugin.router_supports_scheduling(context, r['id'])]
def _get_routers_can_schedule(self, plugin, context, routers, l3_agent):
"""Get the subset of routers that can be scheduled on the L3 agent."""
ids_to_discard = set()
@ -122,42 +74,59 @@ class L3Scheduler(object):
return [r for r in routers if r['id'] not in ids_to_discard]
def auto_schedule_routers(self, plugin, context, host, router_ids):
"""Schedule non-hosted routers to L3 Agent running on host.
def auto_schedule_routers(self, plugin, context, host, router_ids=None):
"""Schedule under-scheduled routers to L3 Agents.
If router_ids is given, each router in router_ids is scheduled
if it is not scheduled yet. Otherwise all unscheduled routers
are scheduled.
Do not schedule the routers which are hosted already
by active l3 agents.
An under-scheduled router is a router that is either completely
un-scheduled (scheduled to 0 agents), or an HA router that is
under-scheduled (scheduled to less than max_l3_agents configuration
option. The function finds all the under-scheduled routers and
schedules them.
:returns: True if routers have been successfully assigned to host
:param host: if unspecified, under-scheduled routers are scheduled to
all agents (not necessarily from the requesting host). If
specified, under-scheduled routers are scheduled only to
the agent on 'host'.
:param router_ids: currently unused and deprecated.
kept for backward compatibility.
"""
if router_ids is not None:
versionutils.report_deprecated_feature(
LOG,
_LW('Passing router_ids has no effect on L3 agent '
'scheduling. This is deprecated and will be '
'removed in the Queens release.'))
l3_agent = plugin.get_enabled_agent_on_host(
context, lib_const.AGENT_TYPE_L3, host)
if not l3_agent:
return
unscheduled_routers = self._get_routers_to_schedule(
plugin, context, router_ids)
if not unscheduled_routers:
if utils.is_extension_supported(
plugin, lib_const.L3_HA_MODE_EXT_ALIAS):
self._schedule_ha_routers_to_additional_agent(
plugin, context, l3_agent)
return
underscheduled_routers = self._get_underscheduled_routers(
plugin, context)
target_routers = self._get_routers_can_schedule(
plugin, context, unscheduled_routers, l3_agent)
if not target_routers:
LOG.warning(_LW('No routers compatible with L3 agent '
'configuration on host %s'), host)
return
plugin, context, underscheduled_routers, l3_agent)
self._bind_routers(plugin, context, target_routers, l3_agent)
for router in target_routers:
self.schedule(plugin, context, router['id'], candidates=[l3_agent])
def _get_underscheduled_routers(self, plugin, context):
underscheduled_routers = []
max_agents_for_ha = plugin.get_number_of_agents_for_scheduling(context)
for router, count in plugin.get_routers_l3_agents_count(context):
if (count < 1 or
router.get('ha', False) and count < max_agents_for_ha):
# Either the router was un-scheduled (scheduled to 0 agents),
# or it's an HA router and it was under-scheduled (scheduled to
# less than max_agents_for_ha). Either way, it should be added
# to the list of routers we want to handle.
underscheduled_routers.append(router)
return underscheduled_routers
def _get_candidates(self, plugin, context, sync_router):
"""Return L3 agents where a router could be scheduled."""
is_ha = sync_router.get('ha', False)
with context.session.begin(subtransactions=True):
# allow one router is hosted by just
# one enabled l3 agent hosting since active is just a
@ -165,7 +134,7 @@ class L3Scheduler(object):
# active any time
current_l3_agents = plugin.get_l3_agents_hosting_routers(
context, [sync_router['id']], admin_state_up=True)
if current_l3_agents:
if current_l3_agents and not is_ha:
LOG.debug('Router %(router_id)s has already been hosted '
'by L3 agent %(agent_id)s',
{'router_id': sync_router['id'],
@ -343,34 +312,17 @@ class L3Scheduler(object):
"""Return a mapping (router, # agents) matching specified filters."""
return plugin.get_ha_routers_l3_agents_count(context)
def _schedule_ha_routers_to_additional_agent(self, plugin, context, agent):
"""Bind already scheduled routers to the agent.
Retrieve the number of agents per router and check if the router has
to be scheduled on the given agent if max_l3_agents_per_router
is not yet reached.
"""
routers_agents = self.get_ha_routers_l3_agents_counts(plugin, context,
agent)
admin_ctx = context.elevated()
underscheduled_routers = [router for router, agents in routers_agents
if (not self.max_ha_agents or
agents < self.max_ha_agents)]
schedulable_routers = self._get_routers_can_schedule(
plugin, admin_ctx, underscheduled_routers, agent)
for router in schedulable_routers:
if not self._router_has_binding(admin_ctx, router['id'],
agent.id):
self.create_ha_port_and_bind(plugin, admin_ctx,
router['id'],
router['tenant_id'],
agent)
def _filter_scheduled_agents(self, plugin, context, router_id, candidates):
hosting = plugin.get_l3_agents_hosting_routers(context, [router_id])
return list(set(candidates) - set(hosting))
def _bind_ha_router(self, plugin, context, router_id,
tenant_id, candidates):
"""Bind a HA router to agents based on a specific policy."""
candidates = self._filter_scheduled_agents(plugin, context, router_id,
candidates)
chosen_agents = self._choose_router_agents_for_ha(
plugin, context, candidates)
@ -384,11 +336,6 @@ class L3Scheduler(object):
class ChanceScheduler(L3Scheduler):
"""Randomly allocate an L3 agent for a router."""
def schedule(self, plugin, context, router_id,
candidates=None):
return self._schedule_router(
plugin, context, router_id, candidates=candidates)
def _choose_router_agent(self, plugin, context, candidates):
return random.choice(candidates)
@ -400,11 +347,6 @@ class ChanceScheduler(L3Scheduler):
class LeastRoutersScheduler(L3Scheduler):
"""Allocate to an L3 agent with the least number of routers bound."""
def schedule(self, plugin, context, router_id,
candidates=None):
return self._schedule_router(
plugin, context, router_id, candidates=candidates)
def _choose_router_agent(self, plugin, context, candidates):
candidate_ids = [candidate['id'] for candidate in candidates]
chosen_agent = plugin.get_l3_agent_with_min_routers(

View File

@ -46,6 +46,7 @@ from neutron.db import l3_attrs_db
from neutron.db import l3_db
from neutron.db import l3_dvr_db
from neutron.db import l3_dvrscheduler_db
from neutron.db import l3_hamode_db
from neutron.db.models import l3 as l3_models
from neutron.db import models_v2
from neutron.extensions import dns
@ -279,7 +280,8 @@ class TestL3NatIntPlugin(TestL3NatBasePlugin,
# scheduling.
class TestL3NatIntAgentSchedulingPlugin(TestL3NatIntPlugin,
l3_agentschedulers_db.
L3AgentSchedulerDbMixin):
L3AgentSchedulerDbMixin,
l3_hamode_db.L3_HA_NAT_db_mixin):
supported_extension_aliases = ["external-net", "router",
"l3_agent_scheduler"]
@ -316,7 +318,8 @@ class TestL3NatServicePlugin(common_db_mixin.CommonDbMixin,
# plugins that delegate away L3 routing functionality
class TestL3NatAgentSchedulingServicePlugin(TestL3NatServicePlugin,
l3_dvrscheduler_db.
L3_DVRsch_db_mixin):
L3_DVRsch_db_mixin,
l3_hamode_db.L3_HA_NAT_db_mixin):
supported_extension_aliases = ["router", "l3_agent_scheduler"]

View File

@ -132,118 +132,12 @@ class L3SchedulerBaseTestCase(base.BaseTestCase):
self.scheduler = FakeL3Scheduler()
self.plugin = mock.Mock()
def test_auto_schedule_routers(self):
self.plugin.get_enabled_agent_on_host.return_value = [mock.ANY]
with mock.patch.object(self.scheduler,
'_get_routers_to_schedule') as gs,\
mock.patch.object(self.scheduler,
'_get_routers_can_schedule',) as gr,\
mock.patch.object(self.scheduler,
'_bind_routers') as gb:
self.scheduler.auto_schedule_routers(
self.plugin, mock.ANY, mock.ANY, mock.ANY)
self.assertTrue(self.plugin.get_enabled_agent_on_host.called)
self.assertTrue(gs.called)
self.assertTrue(gr.called)
self.assertTrue(gb.called)
def test_auto_schedule_routers_no_agents(self):
self.plugin.get_enabled_agent_on_host.return_value = None
result = self.scheduler.auto_schedule_routers(
self.plugin, mock.ANY, mock.ANY, mock.ANY)
self.assertTrue(self.plugin.get_enabled_agent_on_host.called)
self.assertFalse(result)
def test_auto_schedule_routers_no_unscheduled_routers(self):
type(self.plugin).supported_extension_aliases = (
mock.PropertyMock(return_value=[]))
with mock.patch.object(self.scheduler,
'_get_routers_to_schedule') as mock_routers:
mock_routers.return_value = []
result = self.scheduler.auto_schedule_routers(
self.plugin, mock.ANY, mock.ANY, mock.ANY)
self.assertTrue(self.plugin.get_enabled_agent_on_host.called)
self.assertFalse(result)
def test_auto_schedule_routers_no_target_routers(self):
self.plugin.get_enabled_agent_on_host.return_value = [mock.ANY]
with mock.patch.object(
self.scheduler,
'_get_routers_to_schedule') as mock_unscheduled_routers,\
mock.patch.object(
self.scheduler,
'_get_routers_can_schedule') as mock_target_routers:
mock_unscheduled_routers.return_value = mock.ANY
mock_target_routers.return_value = []
result = self.scheduler.auto_schedule_routers(
self.plugin, mock.ANY, mock.ANY, mock.ANY)
self.assertTrue(self.plugin.get_enabled_agent_on_host.called)
self.assertFalse(result)
def test__get_routers_to_schedule_with_router_ids(self):
router_ids = ['foo_router_1', 'foo_router_2']
expected_routers = [
{'id': 'foo_router1'}, {'id': 'foo_router_2'}
]
self.plugin.get_routers.return_value = expected_routers
with mock.patch.object(self.scheduler,
'_filter_unscheduled_routers') as mock_filter:
mock_filter.return_value = expected_routers
unscheduled_routers = self.scheduler._get_routers_to_schedule(
self.plugin, mock.ANY, router_ids)
mock_filter.assert_called_once_with(
mock.ANY, self.plugin, expected_routers)
self.assertEqual(expected_routers, unscheduled_routers)
def test__get_routers_to_schedule_without_router_ids(self):
expected_routers = [
{'id': 'foo_router1'}, {'id': 'foo_router_2'}
]
with mock.patch.object(self.scheduler,
'_get_unscheduled_routers') as mock_get:
mock_get.return_value = expected_routers
unscheduled_routers = self.scheduler._get_routers_to_schedule(
self.plugin, mock.ANY)
mock_get.assert_called_once_with(mock.ANY, self.plugin)
self.assertEqual(expected_routers, unscheduled_routers)
def test__get_routers_to_schedule_excludes_unsupported(self):
routers = [
{'id': 'router_1'}, {'id': 'router_2'}, {'id': 'router_3'}
]
expected_routers = [{'id': 'router_2'}]
# exclude everything except for 2
self.plugin.router_supports_scheduling = lambda c, rid: rid[-1] == '2'
with mock.patch.object(self.scheduler,
'_get_unscheduled_routers') as mock_get:
mock_get.return_value = routers
unscheduled_routers = self.scheduler._get_routers_to_schedule(
self.plugin, mock.ANY)
mock_get.assert_called_once_with(mock.ANY, self.plugin)
self.assertEqual(expected_routers, unscheduled_routers)
def _test__get_routers_can_schedule(self, routers, agent, target_routers):
self.plugin.get_l3_agent_candidates.return_value = agent
result = self.scheduler._get_routers_can_schedule(
self.plugin, mock.ANY, routers, mock.ANY)
self.assertEqual(target_routers, result)
def _test__filter_unscheduled_routers(self, routers, agents, expected):
self.plugin.get_l3_agents_hosting_routers.return_value = agents
unscheduled_routers = self.scheduler._filter_unscheduled_routers(
self.plugin, mock.ANY, routers)
self.assertEqual(expected, unscheduled_routers)
def test__filter_unscheduled_routers_already_scheduled(self):
self._test__filter_unscheduled_routers(
[{'id': 'foo_router1'}, {'id': 'foo_router_2'}],
[{'id': 'foo_agent_id'}], [])
def test__filter_unscheduled_routers_non_scheduled(self):
self._test__filter_unscheduled_routers(
[{'id': 'foo_router1'}, {'id': 'foo_router_2'}],
None, [{'id': 'foo_router1'}, {'id': 'foo_router_2'}])
def test__get_routers_can_schedule_with_compat_agent(self):
routers = [{'id': 'foo_router'}]
self._test__get_routers_can_schedule(routers, mock.ANY, routers)
@ -1592,17 +1486,18 @@ class L3_HA_scheduler_db_mixinTestCase(L3HATestCaseMixin):
self.agent4 = helpers.register_l3_agent(host='host_4')
self.agent_id4 = self.agent4.id
def test_get_ha_routers_l3_agents_count(self):
def test_get_routers_l3_agents_count(self):
router1 = self._create_ha_router()
cfg.CONF.set_override('max_l3_agents_per_router', 2)
router2 = self._create_ha_router()
router3 = self._create_ha_router(ha=False)
result = self.plugin.get_ha_routers_l3_agents_count(self.adminContext)
result = self.plugin.get_routers_l3_agents_count(self.adminContext)
self.assertEqual(2, len(result))
self.assertEqual(3, len(result))
check_result = [(router['id'], agents) for router, agents in result]
self.assertIn((router1['id'], 4), check_result)
self.assertIn((router2['id'], 4), check_result)
self.assertNotIn((router3['id'], mock.ANY), check_result)
self.assertIn((router2['id'], 2), check_result)
self.assertIn((router3['id'], 0), check_result)
def test_get_ordered_l3_agents_by_num_routers(self):
# Mock scheduling so that the test can control it explicitly