ovn-l3 scheduler: calculate load of chassis per priority

previously we calculated the "load" of a chassis across the highest
priority of each of the chassis. This can lead to suboptimal results in
the following situation:
* you have gateway chassis: hv1, hv2, hv3
* you have routers:
   * g1: with priority 3 on hv1, priority 2 on hv2, priority 1 on hv3
   * g2: with priority 3 on hv1, priority 2 on hv2, priority 1 on hv3
   * g3: with priority 3 on hv3, priority 2 on hv2, priority 1 on hv1
   * g4: with priority 3 on hv3, priority 2 on hv2, priority 1 on hv1

When now creating a new router the previous algorythm would have placed
prio 3 of it either on hv1 or hv3 since their count of highest
priorities (2 of prio 3) is lower than the count of the higest priority
of hv2 (4 of prio 2). So it might have looked like:
* g5: with priority 3 on hv3, priority 2 on hv1, priority 1 on hv3
(This case has been implemented as `test_least_loaded_chassis_per_priority2`).

However this is actually a undesired result. In OVN the gateway chassis
with the highest priority actually hosts the router and processes all of
its external traffic. This means it is highly important that the highest
priority is well balanced.

To do this now we no longer blindly use the count of routers of the
highest priority per chassis, but we only count the routers of the
priority we are currently searching a chassis for. This ensures that in
the above case we would have picked hv2 for priority 3, since it has not
actually active router running.

The algorithm implemented now is based upon the assumption, that amount
of priorities scheduled per router is equal over all routers. This means
it will perform suboptimally if some phyiscal network is available on 5
gateway chassis, while another one is only available on 2. (It is
however unclear if the previous implementation would have been better
there).

In this commit we also adopt the testcases in test_l3_ovn_scheduler to match
to this assumption. Previously the distribution data used for testing
had been unrelasitic as it mostly scheduled one gateway chassis for each
router.

It also fixes the previously broken priority calculation in the
testcase, that would just assign prio 0 to all gateways.

Partial-Bug: #2023993
Change-Id: If2afcd546a1da9964704bcebbfa39d8348e14fe8
This commit is contained in:
Felix Huettner 2023-08-31 16:05:38 +02:00
parent b28bf2d3a1
commit b5f5f3def3
3 changed files with 117 additions and 61 deletions

View File

@ -466,12 +466,16 @@ class OvsdbNbOvnIdl(nb_impl_idl.OvnNbApiIdlImpl, Backend):
return cmd.DelStaticRouteCommand(self, lrouter, ip_prefix, nexthop,
if_exists)
def _get_logical_router_port_gateway_chassis(self, lrp):
def _get_logical_router_port_gateway_chassis(self, lrp, priorities=None):
"""Get the list of chassis hosting this gateway port.
@param lrp: logical router port
@type lrp: Logical_Router_Port row
@return: List of tuples (chassis_name, priority) sorted by priority
@param priorities: a list of gateway chassis priorities to search for
@type priorities: list of int
@return: List of tuples (chassis_name, priority) sorted by priority. If
``priorities`` is set then only chassis matching of of these
priorities are returned.
"""
# Try retrieving gateway_chassis with new schema. If new schema is not
# supported or user is using old schema, then use old schema for
@ -479,6 +483,8 @@ class OvsdbNbOvnIdl(nb_impl_idl.OvnNbApiIdlImpl, Backend):
chassis = []
if self._tables.get('Gateway_Chassis'):
for gwc in lrp.gateway_chassis:
if priorities is not None and gwc.priority not in priorities:
continue
chassis.append((gwc.chassis_name, gwc.priority))
else:
rc = lrp.options.get(ovn_const.OVN_GATEWAY_CHASSIS_KEY)
@ -488,14 +494,16 @@ class OvsdbNbOvnIdl(nb_impl_idl.OvnNbApiIdlImpl, Backend):
return sorted(chassis, reverse=True, key=lambda x: x[1])
def get_all_chassis_gateway_bindings(self,
chassis_candidate_list=None):
chassis_candidate_list=None,
priorities=None):
chassis_bindings = {}
for chassis_name in chassis_candidate_list or []:
chassis_bindings.setdefault(chassis_name, [])
for lrp in self._tables['Logical_Router_Port'].rows.values():
if not lrp.name.startswith('lrp-'):
continue
chassis = self._get_logical_router_port_gateway_chassis(lrp)
chassis = self._get_logical_router_port_gateway_chassis(
lrp, priorities=priorities)
for chassis_name, prio in chassis:
if (not chassis_candidate_list or
chassis_name in chassis_candidate_list):

View File

@ -35,7 +35,8 @@ class OVNGatewayScheduler(object, metaclass=abc.ABCMeta):
pass
@abc.abstractmethod
def select(self, nb_idl, gateway_name, candidates=None):
def select(self, nb_idl, gateway_name, candidates=None,
existing_chassis=None):
"""Schedule the gateway port of a router to an OVN chassis.
Schedule the gateway router port only if it is not already
@ -73,12 +74,16 @@ class OVNGatewayScheduler(object, metaclass=abc.ABCMeta):
LOG.warning('Gateway %s was not scheduled on any chassis, no '
'candidates are available', gateway_name)
return [ovn_const.OVN_GATEWAY_INVALID_CHASSIS]
chassis_count = ovn_const.MAX_GW_CHASSIS - len(existing_chassis)
chassis_count = min(
ovn_const.MAX_GW_CHASSIS - len(existing_chassis),
len(candidates)
)
# The actual binding of the gateway to a chassis via the options
# column or gateway_chassis column in the OVN_Northbound is done
# by the caller
chassis = self._select_gateway_chassis(
nb_idl, candidates)[:chassis_count]
nb_idl, candidates, 1, chassis_count
)[:chassis_count]
# priority of existing chassis is higher than candidates
chassis = existing_chassis + chassis
@ -87,8 +92,14 @@ class OVNGatewayScheduler(object, metaclass=abc.ABCMeta):
return chassis
@abc.abstractmethod
def _select_gateway_chassis(self, nb_idl, candidates):
"""Choose a chassis from candidates based on a specific policy."""
def _select_gateway_chassis(self, nb_idl, candidates,
priority_min, priority_max):
"""Choose a chassis from candidates based on a specific policy.
Returns a list of chassis to use for scheduling. The value at
``ret[0]`` will be used for the chassis with ``priority_max``, the
value at ``ret[-1]`` will be used for the chassis with ``priority_min``
"""
class OVNGatewayChanceScheduler(OVNGatewayScheduler):
@ -99,7 +110,8 @@ class OVNGatewayChanceScheduler(OVNGatewayScheduler):
return self._schedule_gateway(nb_idl, gateway_name,
candidates, existing_chassis)
def _select_gateway_chassis(self, nb_idl, candidates):
def _select_gateway_chassis(self, nb_idl, candidates,
priority_min, priority_max):
candidates = copy.deepcopy(candidates)
random.shuffle(candidates)
return candidates
@ -113,31 +125,31 @@ class OVNGatewayLeastLoadedScheduler(OVNGatewayScheduler):
return self._schedule_gateway(nb_idl, gateway_name,
candidates, existing_chassis)
@staticmethod
def _get_chassis_load_by_prios(chassis_info):
"""Retrieve the amount of ports by priorities hosted in the chassis.
@param chassis_info: list of (port, prio) hosted by this chassis
@type chassis_info: []
@return: A list of (prio, number_of_ports) tuples.
def _select_gateway_chassis(self, nb_idl, candidates,
priority_min, priority_max):
"""Returns a lit of chassis from candidates ordered by priority
(highest first). Each chassis in every priority will be selected, as it
is the least loaded for that specific priority.
"""
chassis_load = {}
for lrp, prio in chassis_info:
chassis_load[prio] = chassis_load.get(prio, 0) + 1
return chassis_load.items()
@staticmethod
def _get_chassis_load(chassis):
chassis_ports_prios = chassis[1]
return sorted(
OVNGatewayLeastLoadedScheduler._get_chassis_load_by_prios(
chassis_ports_prios), reverse=True)
def _select_gateway_chassis(self, nb_idl, candidates):
chassis_bindings = nb_idl.get_all_chassis_gateway_bindings(candidates)
return [chassis for chassis, load in
sorted(chassis_bindings.items(),
key=OVNGatewayLeastLoadedScheduler._get_chassis_load)]
selected_chassis = []
priorities = list(range(priority_max, priority_min - 1, -1))
all_chassis_bindings = nb_idl.get_all_chassis_gateway_bindings(
candidates, priorities=priorities)
for priority in priorities:
chassis_load = {}
for chassis, lrps in all_chassis_bindings.items():
if chassis in selected_chassis:
continue
chassis_load[chassis] = len(
[lrp for lrp, prio in lrps if prio == priority])
if len(chassis_load) == 0:
break
leastload = min(chassis_load.values())
chassis = random.choice(
[chassis for chassis, load in chassis_load.items()
if load == leastload])
selected_chassis.append(chassis)
return selected_chassis
OVN_SCHEDULER_STR_TO_CLASS = {

View File

@ -46,25 +46,38 @@ class TestOVNGatewayScheduler(base.BaseTestCase):
'Gateways': {
'g1': [ovn_const.OVN_GATEWAY_INVALID_CHASSIS]}},
'Multiple1': {'Chassis': ['hv1', 'hv2', 'hv3', 'hv4', 'hv5'],
'Gateways': {'g1': ['hv1', 'hv2', 'hv3', 'hv4'],
'g2': ['hv1', 'hv2', 'hv3'],
'g3': ['hv1', 'hv2'],
'g4': ['hv1']}},
'Gateways': {
'g1': ['hv1', 'hv2', 'hv4', 'hv3', 'hv5'],
'g2': ['hv2', 'hv3', 'hv5', 'hv1', 'hv4'],
'g3': ['hv3', 'hv5', 'hv1', 'hv4', 'hv2'],
'g4': ['hv4', 'hv1', 'hv2', 'hv5', 'hv3']}},
'Multiple2': {'Chassis': ['hv1', 'hv2', 'hv3'],
'Gateways': {'g1': ['hv1'],
'g2': ['hv1'],
'g3': ['hv1']}},
'Gateways': {'g1': ['hv1', 'hv2', 'hv3'],
'g2': ['hv2', 'hv1', 'hv3'],
'g3': ['hv2', 'hv1', 'hv3']}},
'Multiple3': {'Chassis': ['hv1', 'hv2', 'hv3'],
'Gateways': {'g1': ['hv3'],
'g2': ['hv2'],
'g3': ['hv2']}},
'Gateways': {'g1': ['hv3', 'hv2', 'hv1'],
'g2': ['hv2', 'hv1', 'hv3'],
'g3': ['hv2', 'hv1', 'hv3']}},
'Multiple4': {'Chassis': ['hv1', 'hv2'],
'Gateways': {'g1': ['hv1'],
'Gateways': {'g1': ['hv1', 'hv2'],
'g2': ['hv1'],
'g3': ['hv1'],
'g4': ['hv1'],
'g5': ['hv1'],
'g6': ['hv1']}}}
'g6': ['hv1']}},
'Multiple5': {'Chassis': ['hv1', 'hv2', 'hv3', 'hv4', 'hv5'],
'Gateways': {
'g1': ['hv1', 'hv2', 'hv3', 'hv4', 'hv5'],
'g2': ['hv3', 'hv2', 'hv4', 'hv5', 'hv1'],
'g3': ['hv4', 'hv5', 'hv1', 'hv2', 'hv3'],
'g4': ['hv5', 'hv1', 'hv2', 'hv3', 'hv4']}},
'Multiple6': {'Chassis': ['hv1', 'hv2', 'hv3'],
'Gateways': {
'g1': ['hv1', 'hv2', 'hv3'],
'g2': ['hv1', 'hv2', 'hv3'],
'g3': ['hv3', 'hv2', 'hv1'],
'g4': ['hv3', 'hv2', 'hv1']}}}
# Determine the chassis to gateway list bindings
for details in self.fake_chassis_gateway_mappings.values():
@ -73,9 +86,11 @@ class TestOVNGatewayScheduler(base.BaseTestCase):
for chassis in details['Chassis']:
details['Chassis_Bindings'].setdefault(chassis, [])
for gw, chassis_list in details['Gateways'].items():
for chassis in chassis_list:
max_prio = len(chassis_list)
for idx, chassis in enumerate(chassis_list):
prio = max_prio - idx
if chassis in details['Chassis_Bindings']:
details['Chassis_Bindings'][chassis].append((gw, 0))
details['Chassis_Bindings'][chassis].append((gw, prio))
def select(self, chassis_gateway_mapping, gateway_name, candidates=None):
nb_idl = FakeOVNGatewaySchedulerNbOvnIdl(chassis_gateway_mapping,
@ -222,22 +237,43 @@ class OVNGatewayLeastLoadedScheduler(TestOVNGatewayScheduler):
# least loaded chassis will be in the front of the list
self.assertEqual(['hv2', 'hv1'], chassis)
def test_least_loaded_chassis_per_priority(self):
mapping = self.fake_chassis_gateway_mappings['Multiple5']
gateway_name = self.new_gateway_name
chassis = self.select(mapping, gateway_name,
candidates=mapping['Chassis'])
# we should now have the following hv's per priority:
# p5: hv2 (since it currently does not have p5 ports)
# p4: hv3 or hv4 (since both currently do not have p4 ports)
# p3: hv5 (since it currently does not have p3 ports)
# p2: hv1 (since it currently does not have p2 ports)
# p1: hv3 or hv4 (since they only have one p1 port;
# cant be hv2 since it was already selected)
self.assertEqual(chassis[0], 'hv2')
self.assertIn(chassis[1], ['hv3', 'hv4'])
self.assertEqual(chassis[2], 'hv5')
self.assertEqual(chassis[3], 'hv1')
self.assertIn(chassis[4], ['hv3', 'hv4'])
self.assertNotEqual(chassis[1], chassis[4])
def test_least_loaded_chassis_per_priority2(self):
mapping = self.fake_chassis_gateway_mappings['Multiple6']
gateway_name = self.new_gateway_name
chassis = self.select(mapping, gateway_name,
candidates=mapping['Chassis'])
# we should now have the following hv's per priority:
# p3: hv2 (since it currently does not have p3 ports)
# p2: hv1 or hv3 (since both currently do not have p2 ports)
# p1: hv1 or hv3 (since they only have two p1 ports;
# cant be hv2 since it was already selected)
self.assertEqual(chassis[0], 'hv2')
self.assertIn(chassis[1], ['hv1', 'hv3'])
self.assertIn(chassis[2], ['hv1', 'hv3'])
self.assertNotEqual(chassis[1], chassis[2])
def test_existing_chassis_available_for_existing_gateway(self):
mapping = self.fake_chassis_gateway_mappings['Multiple1']
gateway_name = random.choice(list(mapping['Gateways'].keys()))
chassis = self.select(mapping, gateway_name,
candidates=mapping['Chassis'])
self.assertEqual(ovn_const.MAX_GW_CHASSIS, len(chassis))
def test__get_chassis_load_by_prios_several_ports(self):
# Adding 5 ports of prio 1 and 5 ports of prio 2
chassis_info = []
for i in range(1, 6):
chassis_info.append(('lrp', 1))
chassis_info.append(('lrp', 2))
actual = self.l3_scheduler._get_chassis_load_by_prios(chassis_info)
expected = {1: 5, 2: 5}
self.assertCountEqual(expected.items(), actual)
def test__get_chassis_load_by_prios_no_ports(self):
self.assertFalse(self.l3_scheduler._get_chassis_load_by_prios([]))