Merge "[OVN] Avoid unnecessary DB writes during agent liveness check"

This commit is contained in:
Zuul 2020-06-22 02:38:40 +00:00 committed by Gerrit Code Review
commit 8b7d4630b5
2 changed files with 45 additions and 19 deletions

View File

@ -1054,7 +1054,7 @@ class OVNMechanismDriver(api.MechanismDriver):
" networking-ovn-metadata-agent status/logs.",
port_id)
def agent_alive(self, chassis, type_):
def agent_alive(self, chassis, type_, update_db):
nb_cfg = chassis.nb_cfg
key = ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY
if type_ == ovn_const.OVN_METADATA_AGENT:
@ -1066,15 +1066,17 @@ class OVNMechanismDriver(api.MechanismDriver):
updated_at = timeutils.parse_isotime(chassis.external_ids[key])
except KeyError:
updated_at = timeutils.utcnow(with_timezone=True)
update_db = True
# Allow a maximum of 1 difference between expected and read values
# to avoid false positives.
if self._nb_ovn.nb_global.nb_cfg - nb_cfg <= 1:
# update the time of our successful check
value = timeutils.utcnow(with_timezone=True).isoformat()
self._sb_ovn.db_set('Chassis', chassis.uuid,
('external_ids', {key: value})).execute(
check_error=True)
if update_db:
# Update the time of our successful check
value = timeutils.utcnow(with_timezone=True).isoformat()
self._sb_ovn.db_set('Chassis', chassis.uuid,
('external_ids', {key: value})).execute(
check_error=True)
return True
now = timeutils.utcnow(with_timezone=True)
@ -1102,7 +1104,7 @@ class OVNMechanismDriver(api.MechanismDriver):
'alive': alive,
'admin_state_up': True}
def agents_from_chassis(self, chassis):
def agents_from_chassis(self, chassis, update_db=True):
agent_dict = {}
# Check for ovn-controller / ovn-controller gateway
@ -1113,7 +1115,7 @@ class OVNMechanismDriver(api.MechanismDriver):
chassis.external_ids.get('ovn-cms-options', [])):
agent_type = ovn_const.OVN_CONTROLLER_GW_AGENT
alive = self.agent_alive(chassis, agent_type)
alive = self.agent_alive(chassis, agent_type, update_db)
description = chassis.external_ids.get(
ovn_const.OVN_AGENT_DESC_KEY, '')
agent_dict[agent_id] = self._format_agent_info(
@ -1125,7 +1127,7 @@ class OVNMechanismDriver(api.MechanismDriver):
ovn_const.OVN_AGENT_METADATA_ID_KEY)
if metadata_agent_id:
agent_type = ovn_const.OVN_METADATA_AGENT
alive = self.agent_alive(chassis, agent_type)
alive = self.agent_alive(chassis, agent_type, update_db)
description = chassis.external_ids.get(
ovn_const.OVN_AGENT_METADATA_DESC_KEY, '')
agent_dict[metadata_agent_id] = self._format_agent_info(
@ -1159,7 +1161,11 @@ class OVNMechanismDriver(api.MechanismDriver):
setattr(self._plugin, method_name, types.MethodType(fn, self._plugin))
def ping_chassis(self):
"""Update NB_Global.nb_cfg so that Chassis.nb_cfg will increment"""
"""Update NB_Global.nb_cfg so that Chassis.nb_cfg will increment
:returns: (bool) True if nb_cfg was updated. False if it was updated
recently and this call didn't trigger any update.
"""
last_ping = self._nb_ovn.nb_global.external_ids.get(
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY)
if last_ping:
@ -1167,11 +1173,12 @@ class OVNMechanismDriver(api.MechanismDriver):
next_ping = (timeutils.parse_isotime(last_ping) +
datetime.timedelta(seconds=interval))
if timeutils.utcnow(with_timezone=True) < next_ping:
return
return False
with self._nb_ovn.create_transaction(check_error=True,
bump_nb_cfg=True) as txn:
txn.add(self._nb_ovn.check_liveness())
return True
def populate_agents(driver):
@ -1182,12 +1189,12 @@ def populate_agents(driver):
def get_agents(self, context, filters=None, fields=None, _driver=None):
_driver.ping_chassis()
update_db = _driver.ping_chassis()
filters = filters or {}
agent_list = []
populate_agents(_driver)
for ch in AGENTS.values():
for agent in _driver.agents_from_chassis(ch).values():
for agent in _driver.agents_from_chassis(ch, update_db).values():
if all(agent[k] in v for k, v in filters.items()):
agent_list.append(agent)
return agent_list

View File

@ -1543,7 +1543,10 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase):
ovn_const.OVN_METADATA_AGENT):
self.mech_driver._nb_ovn.nb_global.nb_cfg = 5
chassis = self._add_chassis_agent(5, agent_type)
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type))
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type,
update_db=True))
# Assert that each Chassis has been updated in the SB database
self.assertEqual(2, self.sb_ovn.db_set.call_count)
def test_agent_alive_true_one_diff(self):
# Agent should be reported as alive when the nb_cfg delta is 1
@ -1554,14 +1557,16 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase):
now = timeutils.utcnow()
updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1)
chassis = self._add_chassis_agent(4, agent_type, updated_at)
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type))
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type,
update_db=True))
def test_agent_alive_not_timed_out(self):
for agent_type in (ovn_const.OVN_CONTROLLER_AGENT,
ovn_const.OVN_METADATA_AGENT):
self.mech_driver._nb_ovn.nb_global.nb_cfg = 5
chassis = self._add_chassis_agent(3, agent_type)
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type),
self.assertTrue(self.mech_driver.agent_alive(
chassis, agent_type, update_db=True),
"Agent type %s is not alive" % agent_type)
def test_agent_alive_timed_out(self):
@ -1571,7 +1576,17 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase):
now = timeutils.utcnow()
updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1)
chassis = self._add_chassis_agent(3, agent_type, updated_at)
self.assertFalse(self.mech_driver.agent_alive(chassis, agent_type))
self.assertFalse(self.mech_driver.agent_alive(chassis, agent_type,
update_db=True))
def test_agent_alive_true_skip_db_update(self):
for agent_type in (ovn_const.OVN_CONTROLLER_AGENT,
ovn_const.OVN_METADATA_AGENT):
self.mech_driver._nb_ovn.nb_global.nb_cfg = 5
chassis = self._add_chassis_agent(5, agent_type)
self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type,
update_db=False))
self.sb_ovn.db_set.assert_not_called()
def _test__update_dnat_entry_if_needed(self, up=True):
ovn_conf.cfg.CONF.set_override(
@ -1656,10 +1671,12 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase):
self.nb_ovn.nb_global.external_ids = {
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY: str(time)}
update_db = self.mech_driver.ping_chassis()
# Since the interval has expired, assert that the "check_liveness"
# command has been invoked
self.mech_driver.ping_chassis()
self.nb_ovn.check_liveness.assert_called_once_with()
# Assert that ping_chassis returned True as it updated the db
self.assertTrue(update_db)
def test_ping_chassis_interval_not_expired(self):
ovn_conf.cfg.CONF.set_override('agent_down_time', 10)
@ -1669,9 +1686,11 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase):
self.nb_ovn.nb_global.external_ids = {
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY: str(time)}
update_db = self.mech_driver.ping_chassis()
# Assert that "check_liveness" wasn't invoked
self.mech_driver.ping_chassis()
self.assertFalse(self.nb_ovn.check_liveness.called)
# Assert that ping_chassis returned False as it didn't update the db
self.assertFalse(update_db)
class OVNMechanismDriverTestCase(test_plugin.Ml2PluginV2TestCase):