ovn-l3: reschedule lower priorities
if a gateway chassis is removed we previously only plugged the hole it left in the priorities of the lrps. This can lead to bad choice since we are bound by all other currently used chassis. By allowing us to also reschedule the lower priorities we get significantly more freedom in choosing the most appropriate chassis and prevent overloading an individual one. As an example from the new testcase: previously we would have had all prio 2 schedules on chassis3, but with this change now this distributes better also to chassis4. Partial-Bug: #2023993 Change-Id: I786ff6c0c4d3403b79819df95f9b1d6ac5e8675f
This commit is contained in:
parent
b5f5f3def3
commit
3d5d82a197
@ -414,18 +414,44 @@ class OVNL3RouterPlugin(service_base.ServicePluginBase,
|
||||
unhosted_gateways = self._nb_ovn.get_unhosted_gateways(
|
||||
port_physnet_dict, chassis_with_physnets,
|
||||
all_gw_chassis, chassis_with_azs)
|
||||
for g_name in unhosted_gateways:
|
||||
|
||||
self._reschedule_lrps(unhosted_gateways)
|
||||
|
||||
def _reschedule_lrps(self, lrps):
|
||||
# GW ports and its physnets.
|
||||
port_physnet_dict = self._get_gateway_port_physnet_mapping()
|
||||
# All chassis with physnets configured.
|
||||
chassis_with_physnets = self._sb_ovn.get_chassis_and_physnets()
|
||||
# All chassis with enable_as_gw_chassis set
|
||||
all_gw_chassis = self._sb_ovn.get_gateway_chassis_from_cms_options()
|
||||
chassis_with_azs = self._sb_ovn.get_chassis_and_azs()
|
||||
|
||||
for g_name in lrps:
|
||||
physnet = port_physnet_dict.get(g_name[len(ovn_const.LRP_PREFIX):])
|
||||
# Remove any invalid gateway chassis from the list, otherwise
|
||||
# we can have a situation where all existing_chassis are invalid
|
||||
existing_chassis = self._nb_ovn.get_gateway_chassis_binding(g_name)
|
||||
primary = existing_chassis[0] if existing_chassis else None
|
||||
az_hints = self._nb_ovn.get_gateway_chassis_az_hints(g_name)
|
||||
existing_chassis = self.scheduler.filter_existing_chassis(
|
||||
nb_idl=self._nb_ovn, gw_chassis=all_gw_chassis,
|
||||
physnet=physnet, chassis_physnets=chassis_with_physnets,
|
||||
existing_chassis=existing_chassis, az_hints=az_hints,
|
||||
chassis_with_azs=chassis_with_azs)
|
||||
filtered_existing_chassis = \
|
||||
self.scheduler.filter_existing_chassis(
|
||||
nb_idl=self._nb_ovn, gw_chassis=all_gw_chassis,
|
||||
physnet=physnet,
|
||||
chassis_physnets=chassis_with_physnets,
|
||||
existing_chassis=existing_chassis, az_hints=az_hints,
|
||||
chassis_with_azs=chassis_with_azs)
|
||||
if existing_chassis != filtered_existing_chassis:
|
||||
first_diff = None
|
||||
for i in range(len(filtered_existing_chassis)):
|
||||
if existing_chassis[i] != filtered_existing_chassis[i]:
|
||||
first_diff = i
|
||||
break
|
||||
if first_diff is not None:
|
||||
LOG.debug(
|
||||
"A chassis for this gateway has been filtered. "
|
||||
"Rebalancing priorities %s and lower", first_diff)
|
||||
filtered_existing_chassis = filtered_existing_chassis[
|
||||
:max(first_diff, 1)]
|
||||
|
||||
candidates = self._ovn_client.get_candidates_for_scheduling(
|
||||
physnet, cms=all_gw_chassis,
|
||||
@ -433,7 +459,7 @@ class OVNL3RouterPlugin(service_base.ServicePluginBase,
|
||||
availability_zone_hints=az_hints)
|
||||
chassis = self.scheduler.select(
|
||||
self._nb_ovn, g_name, candidates=candidates,
|
||||
existing_chassis=existing_chassis)
|
||||
existing_chassis=filtered_existing_chassis)
|
||||
if primary and primary != chassis[0]:
|
||||
if primary not in chassis:
|
||||
LOG.debug("Primary gateway chassis %(old)s "
|
||||
|
@ -1651,6 +1651,69 @@ class TestOVNL3RouterPlugin(test_mech_driver.Ml2PluginV2TestCase):
|
||||
self.nb_idl().get_unhosted_gateways.assert_called_once_with(
|
||||
{'foo-1': 'physnet1'}, mock.ANY, mock.ANY, mock.ANY)
|
||||
|
||||
@mock.patch('neutron.plugins.ml2.drivers.ovn.mech_driver.mech_driver.'
|
||||
'OVNMechanismDriver.list_availability_zones', lambda *_: [])
|
||||
@mock.patch('neutron.services.ovn_l3.plugin.OVNL3RouterPlugin.'
|
||||
'_get_gateway_port_physnet_mapping')
|
||||
def test_schedule_unhosted_gateways_rebalances_lower_prios(self, get_gppm):
|
||||
unhosted_gws = ['lrp-foo-1', 'lrp-foo-2', 'lrp-foo-3']
|
||||
get_gppm.return_value = {k[len(ovn_const.LRP_PREFIX):]: 'physnet1'
|
||||
for k in unhosted_gws}
|
||||
# we skip chasiss2 here since we assume it has been removed
|
||||
chassis_mappings = {
|
||||
'chassis1': ['physnet1'],
|
||||
'chassis3': ['physnet1'],
|
||||
'chassis4': ['physnet1'],
|
||||
}
|
||||
chassis = ['chassis1', 'chassis3', 'chassis4']
|
||||
self.sb_idl().get_chassis_and_physnets.return_value = (
|
||||
chassis_mappings)
|
||||
self.sb_idl().get_gateway_chassis_from_cms_options.return_value = (
|
||||
chassis)
|
||||
self.nb_idl().get_unhosted_gateways.return_value = unhosted_gws
|
||||
self.mock_candidates.return_value = chassis
|
||||
# all ports have 4 chassis (including chassis2 that will be removed)
|
||||
# the ports are not perfectly balanced (but this is realistic with a)
|
||||
# few router creations and deletions
|
||||
existing_port_bindings = [
|
||||
['chassis1', 'chassis2', 'chassis3', 'chassis4'],
|
||||
['chassis2', 'chassis4', 'chassis3', 'chassis1'],
|
||||
['chassis4', 'chassis3', 'chassis1', 'chassis2']]
|
||||
self.nb_idl().get_gateway_chassis_binding.side_effect = (
|
||||
existing_port_bindings)
|
||||
# for 1. port reschedule all besides the first
|
||||
# for 2. port reschedule all besides the new first (chassis 4)
|
||||
# for 3. port keep all and drop the last
|
||||
self.mock_schedule.side_effect = [
|
||||
['chassis1', 'chassis4', 'chassis3'],
|
||||
['chassis4', 'chassis3', 'chassis1'],
|
||||
['chassis4', 'chassis3', 'chassis1']]
|
||||
|
||||
self.l3_inst.schedule_unhosted_gateways()
|
||||
|
||||
self.mock_candidates.assert_has_calls([
|
||||
mock.call(mock.ANY,
|
||||
chassis_physnets=chassis_mappings,
|
||||
cms=chassis, availability_zone_hints=[])] * 3)
|
||||
self.mock_schedule.assert_has_calls([
|
||||
mock.call(self.nb_idl(), 'lrp-foo-1',
|
||||
['chassis1', 'chassis3', 'chassis4'],
|
||||
['chassis1']),
|
||||
mock.call(self.nb_idl(), 'lrp-foo-2',
|
||||
['chassis1', 'chassis3', 'chassis4'],
|
||||
['chassis4']),
|
||||
mock.call(self.nb_idl(), 'lrp-foo-3',
|
||||
['chassis1', 'chassis3', 'chassis4'],
|
||||
['chassis4', 'chassis3', 'chassis1'])])
|
||||
# make sure that the primary chassis stays untouched
|
||||
self.nb_idl().update_lrouter_port.assert_has_calls([
|
||||
mock.call('lrp-foo-1',
|
||||
gateway_chassis=['chassis1', 'chassis4', 'chassis3']),
|
||||
mock.call('lrp-foo-2',
|
||||
gateway_chassis=['chassis4', 'chassis3', 'chassis1']),
|
||||
mock.call('lrp-foo-3',
|
||||
gateway_chassis=['chassis4', 'chassis3', 'chassis1'])])
|
||||
|
||||
@mock.patch('neutron.plugins.ml2.plugin.Ml2Plugin.get_network')
|
||||
@mock.patch('neutron.plugins.ml2.plugin.Ml2Plugin.get_networks')
|
||||
@mock.patch('neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb.'
|
||||
|
@ -0,0 +1,6 @@
|
||||
---
|
||||
other:
|
||||
- |
|
||||
The OVN L3 scheduler will now update lower priorities of exising LRPs in
|
||||
case of a chassis change. This can create increased load on OVN during
|
||||
chassis shutdown, but improves the load distribution of LRPs.
|
Loading…
Reference in New Issue
Block a user