Fix handling the restart of ovn-controllers
The previous `getattr(old, 'nb_cfg', False)` would evaluate to `False` if the `old` row either did not contain a `nb_cfg` value or if the value was 0. As 0 is the value set on startup of the ovn-controller this causes the neutron-api to ignore any event a ovn-controller directly sends after startup. In turn this causes us to miss the information that the agent is synchronized, causing the agent to appear as down, until something bumps the `nb_cfg` value globally. Closes-Bug: #1997982 Change-Id: Icec8fee93e64b871999f38674e305238e9705fd4
This commit is contained in:
parent
0384b3193b
commit
4cc611d319
@ -336,7 +336,7 @@ class ChassisAgentWriteEvent(ChassisAgentEvent):
|
|||||||
# don't update the AgentCache. We use chassis_private.chassis to return
|
# don't update the AgentCache. We use chassis_private.chassis to return
|
||||||
# data about the agent.
|
# data about the agent.
|
||||||
return event == self.ROW_CREATE or (
|
return event == self.ROW_CREATE or (
|
||||||
getattr(old, 'nb_cfg', False) and not
|
hasattr(old, 'nb_cfg') and not
|
||||||
(self.table == 'Chassis_Private' and not row.chassis))
|
(self.table == 'Chassis_Private' and not row.chassis))
|
||||||
|
|
||||||
def run(self, event, row, old):
|
def run(self, event, row, old):
|
||||||
|
@ -484,6 +484,66 @@ class TestAgentMonitor(base.TestOVNFunctionalBase):
|
|||||||
self.fail('Chassis timestamp: %s, agent updated_at: %s' %
|
self.fail('Chassis timestamp: %s, agent updated_at: %s' %
|
||||||
(chassis_ts, str(agent.updated_at)))
|
(chassis_ts, str(agent.updated_at)))
|
||||||
|
|
||||||
|
def test_agent_restart(self):
|
||||||
|
def check_agent_up():
|
||||||
|
agent = neutron_agent.AgentCache()[self.chassis_name]
|
||||||
|
return agent.alive
|
||||||
|
|
||||||
|
def check_agent_down():
|
||||||
|
return not check_agent_up()
|
||||||
|
|
||||||
|
def check_nb_cfg_timestamp_is_not_null():
|
||||||
|
agent = neutron_agent.AgentCache()[self.chassis_name]
|
||||||
|
return agent.updated_at != 0
|
||||||
|
|
||||||
|
if not self.sb_api.is_table_present('Chassis_Private'):
|
||||||
|
self.skipTest('Ovn sb not support Chassis_Private')
|
||||||
|
|
||||||
|
# Set nb_cfg to some realistic value, so that the alive check can
|
||||||
|
# actually work
|
||||||
|
self.nb_api.db_set(
|
||||||
|
'NB_Global', '.', ('nb_cfg', 1337)).execute(check_error=True)
|
||||||
|
self.sb_api.db_set(
|
||||||
|
'Chassis_Private', self.chassis_name, ('nb_cfg', 1337)
|
||||||
|
).execute(check_error=True)
|
||||||
|
|
||||||
|
chassis_uuid = self.sb_api.db_get(
|
||||||
|
'Chassis', self.chassis_name, 'uuid').execute(check_error=True)
|
||||||
|
|
||||||
|
self.assertTrue(check_agent_up())
|
||||||
|
n_utils.wait_until_true(check_nb_cfg_timestamp_is_not_null, timeout=5)
|
||||||
|
|
||||||
|
# Lets start by shutting down the ovn-controller
|
||||||
|
# (where it will remove the Chassis_Private table entry)
|
||||||
|
self.sb_api.db_destroy(
|
||||||
|
'Chassis_Private', self.chassis_name).execute(check_error=True)
|
||||||
|
try:
|
||||||
|
n_utils.wait_until_true(check_agent_down, timeout=5)
|
||||||
|
except n_utils.WaitTimeout:
|
||||||
|
self.fail('Agent did not go down after Chassis_Private removal')
|
||||||
|
|
||||||
|
# Now the ovn-controller starts up again and has not yet synced with
|
||||||
|
# the southbound database
|
||||||
|
self.sb_api.db_create(
|
||||||
|
'Chassis_Private', name=self.chassis_name,
|
||||||
|
external_ids={}, chassis=chassis_uuid,
|
||||||
|
nb_cfg_timestamp=0, nb_cfg=0
|
||||||
|
).execute(check_error=True)
|
||||||
|
self.assertTrue(check_agent_down())
|
||||||
|
|
||||||
|
# Now the ovn-controller has synced with the southbound database
|
||||||
|
nb_cfg_timestamp = timeutils.utcnow_ts() * 1000
|
||||||
|
with self.sb_api.transaction() as txn:
|
||||||
|
txn.add(self.sb_api.db_set('Chassis_Private', self.chassis_name,
|
||||||
|
('nb_cfg_timestamp', nb_cfg_timestamp)))
|
||||||
|
txn.add(self.sb_api.db_set('Chassis_Private', self.chassis_name,
|
||||||
|
('nb_cfg', 1337)))
|
||||||
|
try:
|
||||||
|
n_utils.wait_until_true(check_agent_up, timeout=5)
|
||||||
|
except n_utils.WaitTimeout:
|
||||||
|
self.fail('Agent did not go up after sync is done')
|
||||||
|
self.assertTrue(check_nb_cfg_timestamp_is_not_null())
|
||||||
|
|
||||||
|
|
||||||
class TestOvnIdlProbeInterval(base.TestOVNFunctionalBase):
|
class TestOvnIdlProbeInterval(base.TestOVNFunctionalBase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user