Lower spacing time of the OVN maintenance tasks which should be run once
Some of the OVN maintenance tasks are expected to be run just once and then they raise periodic.NeverAgain() to not be run anymore. Those tasks also require to have acquried ovn db lock so that only one of the maintenance workers really runs them. All those tasks had set 600 seconds as a spacing time so they were run every 600 seconds. This works fine usually but that may cause small issue in the environments were Neutron is run in POD as k8s/openshift application. In such case, when e.g. configuration of neutron is updated, it may happen that first new POD with Neutron is spawned and only once it is already running, k8s will stop old POD. Because of that maintenance worker running in the new neutron-server POD will not acquire lock on the OVN DB (old POD still holds the lock) and will not run all those maintenance tasks immediately. After old POD will be terminated, one of the new PODs will at some point acquire that lock and then will run all those maintenance tasks but this would cause 600 seconds delay in running them. To avoid such long waiting time to run those maintenance tasks, this patch lowers its spacing time from 600 to just 5 seconds. Additionally maintenance tasks which are supposed to be run only once and only by the maintenance worker which has acquired ovn db lock will now be stopped (periodic.NeverAgain will be raised) after 100 attempts of run. This will avoid running them every 5 seconds forever on the workers which don't acquire lock on the OVN DB at all. Closes-bug: #2074209 Change-Id: Iabb4bb427588c1a5da27a5d313f75b5bd23805b2
This commit is contained in:
parent
0488bdafa1
commit
04c217bcd0
neutron
common/ovn
plugins/ml2/drivers/ovn/mech_driver/ovsdb
tests/unit/plugins/ml2/drivers/ovn/mech_driver/ovsdb
@ -275,6 +275,8 @@ _TYPES_PRIORITY_ORDER = (
|
||||
TYPE_SECURITY_GROUP_RULES)
|
||||
|
||||
DB_CONSISTENCY_CHECK_INTERVAL = 300 # 5 minutes
|
||||
MAINTENANCE_TASK_RETRY_LIMIT = 100 # times
|
||||
MAINTENANCE_ONE_RUN_TASK_SPACING = 5 # seconds
|
||||
|
||||
# The order in which the resources should be created or updated by the
|
||||
# maintenance task: Root ones first and leafs at the end.
|
||||
|
@ -56,14 +56,28 @@ INCONSISTENCY_TYPE_CREATE_UPDATE = 'create/update'
|
||||
INCONSISTENCY_TYPE_DELETE = 'delete'
|
||||
|
||||
|
||||
def has_lock_periodic(*args, **kwargs):
|
||||
def has_lock_periodic(*args, periodic_run_limit=0, **kwargs):
|
||||
def wrapper(f):
|
||||
_retries = 0
|
||||
|
||||
@functools.wraps(f)
|
||||
@periodics.periodic(*args, **kwargs)
|
||||
def decorator(self, *args, **kwargs):
|
||||
# This periodic task is included in DBInconsistenciesPeriodics
|
||||
# since it uses the lock to ensure only one worker is executing
|
||||
# additonally, if periodic_run_limit parameter with value > 0 is
|
||||
# provided and lock is not acquired for periodic_run_limit
|
||||
# times, task will not be run anymore by this maintenance worker
|
||||
nonlocal _retries
|
||||
if not self.has_lock:
|
||||
if periodic_run_limit > 0:
|
||||
if _retries >= periodic_run_limit:
|
||||
LOG.debug("Have not been able to acquire lock to run "
|
||||
"task '%s' after %s tries, limit reached. "
|
||||
"No more attempts will be made.",
|
||||
f, _retries)
|
||||
raise periodics.NeverAgain()
|
||||
_retries += 1
|
||||
return
|
||||
return f(self, *args, **kwargs)
|
||||
return decorator
|
||||
@ -444,7 +458,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_global_dhcp_opts(self):
|
||||
if (not ovn_conf.get_global_dhcpv4_opts() and
|
||||
not ovn_conf.get_global_dhcpv6_opts()):
|
||||
@ -474,7 +491,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_for_igmp_snoop_support(self):
|
||||
snooping_conf = ovs_conf.get_igmp_snooping_enabled()
|
||||
flood_conf = ovs_conf.get_igmp_flood_unregistered()
|
||||
@ -504,7 +524,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
# TODO(czesla): Remove this in the A+4 cycle
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_port_has_address_scope(self):
|
||||
ports = self._nb_idl.db_find_rows(
|
||||
"Logical_Switch_Port", ("type", "!=", ovn_const.LSP_TYPE_LOCALNET)
|
||||
@ -539,7 +562,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_for_ha_chassis_group(self):
|
||||
# If external ports is not supported stop running
|
||||
# this periodic task
|
||||
@ -567,7 +593,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
# TODO(lucasagomes): Remove this in the B+3 cycle
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_for_mcast_flood_reports(self):
|
||||
mcast_flood_conf = ovs_conf.get_igmp_flood()
|
||||
mcast_flood_reports_conf = ovs_conf.get_igmp_flood_reports()
|
||||
@ -625,7 +654,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_localnet_port_has_learn_fdb(self):
|
||||
ports = self._nb_idl.db_find_rows(
|
||||
"Logical_Switch_Port", ("type", "=", ovn_const.LSP_TYPE_LOCALNET)
|
||||
@ -652,7 +684,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_redirect_type_router_gateway_ports(self):
|
||||
"""Check OVN router gateway ports
|
||||
Check for the option "redirect-type=bridged" value for
|
||||
@ -714,7 +749,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_vlan_distributed_ports(self):
|
||||
"""Check VLAN distributed ports
|
||||
Check for the option "reside-on-redirect-chassis" value for
|
||||
@ -752,7 +790,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_fdb_aging_settings(self):
|
||||
"""Check FDB aging settings
|
||||
Ensure FDB aging settings are enforced.
|
||||
@ -788,7 +829,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def update_mac_aging_settings(self):
|
||||
"""Ensure that MAC_Binding aging options are set"""
|
||||
with self._nb_idl.transaction(check_error=True) as txn:
|
||||
@ -804,7 +848,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
# "external_ids:OVN_GW_PORT_EXT_ID_KEY" from to each router.
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def remove_gw_ext_ids_from_logical_router(self):
|
||||
"""Remove `gw_port_id` and `gw_network_id` external_ids from LRs"""
|
||||
cmds = []
|
||||
@ -832,7 +879,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
# A static spacing value is used here, but this method will only run
|
||||
# once per lock due to the use of periodics.NeverAgain().
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_baremetal_ports_dhcp_options(self):
|
||||
"""Update baremetal ports DHCP options
|
||||
|
||||
@ -881,7 +931,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(ralonsoh): Remove this in the Antelope+4 cycle
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def create_router_extra_attributes_registers(self):
|
||||
"""Create missing ``RouterExtraAttributes`` registers.
|
||||
|
||||
@ -902,7 +955,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(slaweq): Remove this in the E cycle (C+2 as it will be next SLURP)
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def add_gw_port_info_to_logical_router_port(self):
|
||||
"""Add info if LRP is connecting internal subnet or ext gateway."""
|
||||
cmds = []
|
||||
@ -938,7 +994,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
txn.add(cmd)
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_router_default_route_empty_dst_ip(self):
|
||||
"""Check routers with default route with empty dst-ip (LP: #2002993).
|
||||
"""
|
||||
@ -965,7 +1024,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(ralonsoh): Remove this in the Antelope+4 cycle
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def add_vnic_type_and_pb_capabilities_to_lsp(self):
|
||||
"""Add the port VNIC type and port binding capabilities to the LSP.
|
||||
|
||||
@ -997,7 +1059,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_fair_meter_consistency(self):
|
||||
"""Update the logging meter after neutron-server reload
|
||||
|
||||
@ -1060,7 +1125,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def update_nat_floating_ip_with_gateway_port_reference(self):
|
||||
"""Set NAT rule gateway_port column to any floating IP without
|
||||
router gateway port uuid reference - LP#2035281.
|
||||
@ -1107,7 +1175,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(ralonsoh): Remove this method in the C+2 cycle (next SLURP release)
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def add_provider_resource_association_to_routers(self):
|
||||
"""Add the ``ProviderResourceAssociation`` register to all routers"""
|
||||
provider_name = 'ovn'
|
||||
@ -1126,7 +1197,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(ralonsoh): Remove this method in the C+2 cycle (next SLURP release)
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def remove_invalid_gateway_chassis_from_unbound_lrp(self):
|
||||
"""Removes all invalid 'Gateway_Chassis' from unbound LRPs"""
|
||||
is_gw = ovn_const.OVN_ROUTER_IS_EXT_GW
|
||||
@ -1149,7 +1223,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
# TODO(ralonsoh): Remove this method in the E cycle (SLURP release)
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def set_network_type(self):
|
||||
"""Add the network type to the Logical_Switch registers"""
|
||||
context = n_context.get_admin_context()
|
||||
@ -1172,7 +1249,10 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
|
||||
|
||||
raise periodics.NeverAgain()
|
||||
|
||||
@has_lock_periodic(spacing=600, run_immediately=True)
|
||||
@has_lock_periodic(
|
||||
periodic_run_limit=ovn_const.MAINTENANCE_TASK_RETRY_LIMIT,
|
||||
spacing=ovn_const.MAINTENANCE_ONE_RUN_TASK_SPACING,
|
||||
run_immediately=True)
|
||||
def check_network_broadcast_arps_to_all_routers(self):
|
||||
"""Check the broadcast-arps-to-all-routers config
|
||||
|
||||
|
@ -32,12 +32,69 @@ from neutron.db.models import ovn as ovn_models
|
||||
from neutron.db import ovn_revision_numbers_db
|
||||
from neutron.objects import ports as ports_obj
|
||||
from neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb import maintenance
|
||||
from neutron.tests import base
|
||||
from neutron.tests.unit import fake_resources as fakes
|
||||
from neutron.tests.unit.plugins.ml2 import test_security_group as test_sg
|
||||
from neutron.tests.unit import testlib_api
|
||||
from neutron_lib import exceptions as n_exc
|
||||
|
||||
|
||||
class TestHasLockPeriodicDecorator(base.BaseTestCase):
|
||||
|
||||
def test_decorator_no_limit_have_lock(self):
|
||||
run_counter = 0
|
||||
|
||||
@maintenance.has_lock_periodic(
|
||||
periodic_run_limit=0, spacing=30)
|
||||
def test_maintenance_task(worker):
|
||||
nonlocal run_counter
|
||||
run_counter += 1
|
||||
|
||||
worker_mock = mock.MagicMock()
|
||||
worker_mock.has_lock = True
|
||||
|
||||
for _ in range(3):
|
||||
test_maintenance_task(worker_mock)
|
||||
self.assertEqual(3, run_counter)
|
||||
|
||||
def test_decorator_no_lock_no_limit(self):
|
||||
run_counter = 0
|
||||
|
||||
@maintenance.has_lock_periodic(
|
||||
periodic_run_limit=0, spacing=30)
|
||||
def test_maintenance_task(worker):
|
||||
nonlocal run_counter
|
||||
run_counter += 1
|
||||
|
||||
worker_mock = mock.MagicMock()
|
||||
has_lock_values = [False, False, True]
|
||||
|
||||
for has_lock in has_lock_values:
|
||||
worker_mock.has_lock = has_lock
|
||||
test_maintenance_task(worker_mock)
|
||||
self.assertEqual(1, run_counter)
|
||||
|
||||
def test_decorator_no_lock_with_limit(self):
|
||||
run_counter = 0
|
||||
|
||||
@maintenance.has_lock_periodic(
|
||||
periodic_run_limit=1, spacing=30)
|
||||
def test_maintenance_task(worker):
|
||||
nonlocal run_counter
|
||||
run_counter += 1
|
||||
|
||||
worker_mock = mock.MagicMock()
|
||||
|
||||
worker_mock.has_lock = False
|
||||
test_maintenance_task(worker_mock)
|
||||
self.assertEqual(0, run_counter)
|
||||
|
||||
worker_mock.has_lock = False
|
||||
self.assertRaises(periodics.NeverAgain,
|
||||
test_maintenance_task, worker_mock)
|
||||
self.assertEqual(0, run_counter)
|
||||
|
||||
|
||||
class TestSchemaAwarePeriodicsBase(testlib_api.SqlTestCaseLight):
|
||||
|
||||
def test__set_schema_aware_periodics(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user