Never raise an exception in notify()

notify() is called from python-ovs code which is not built to
recover from an exception in this user-overriden code. If there
is an exception (e.g. the DB server is down when we process
the hash ring), this exception can cause an unrecoverable error
in processing OVSDB messages, rendering the neutron worker useless.

Change-Id: I5f703d82175d71a222c76df37a82b5ccad890d14
(cherry picked from commit 67e616b2380d6549308a15077b2043721dbea5d0)
This commit is contained in:
Terry Wilson 2023-01-26 08:37:24 -06:00 committed by yatin
parent 2617f7b93c
commit 931f0af2e3

View File

@ -720,39 +720,43 @@ class OvnIdlDistributedLock(BaseOvnIdl):
self.driver.agent_chassis_table = 'Chassis_Private' self.driver.agent_chassis_table = 'Chassis_Private'
def notify(self, event, row, updates=None): def notify(self, event, row, updates=None):
self.handle_db_schema_changes(event, row)
self.notify_handler.notify(event, row, updates, global_=True)
try: try:
target_node = self._hash_ring.get_node(str(row.uuid)) self.handle_db_schema_changes(event, row)
except exceptions.HashRingIsEmpty as e: self.notify_handler.notify(event, row, updates, global_=True)
LOG.error('HashRing is empty, error: %s', e)
return
if target_node != self._node_uuid:
return
# If the worker hasn't been health checked by the maintenance
# thread (see bug #1834498), indicate that it's alive here
time_now = timeutils.utcnow()
touch_timeout = time_now - datetime.timedelta(
seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
if not self._last_touch or touch_timeout >= self._last_touch:
# NOTE(lucasagomes): Guard the db operation with an exception
# handler. If heartbeating fails for whatever reason, log
# the error and continue with processing the event
try: try:
ctx = neutron_context.get_admin_context() target_node = self._hash_ring.get_node(str(row.uuid))
ovn_hash_ring_db.touch_node(ctx, self._node_uuid) except exceptions.HashRingIsEmpty as e:
self._last_touch = time_now LOG.error('HashRing is empty, error: %s', e)
except Exception: return
LOG.exception('Hash Ring node %s failed to heartbeat', if target_node != self._node_uuid:
self._node_uuid) return
LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) ' # If the worker hasn't been health checked by the maintenance
'handling event "%(event)s" for row %(row)s ' # thread (see bug #1834498), indicate that it's alive here
'(table: %(table)s)', time_now = timeutils.utcnow()
{'node': self._node_uuid, 'hostname': CONF.host, touch_timeout = time_now - datetime.timedelta(
'event': event, 'row': row.uuid, 'table': row._table.name}) seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
self.notify_handler.notify(event, row, updates) if not self._last_touch or touch_timeout >= self._last_touch:
# NOTE(lucasagomes): Guard the db operation with an exception
# handler. If heartbeating fails for whatever reason, log
# the error and continue with processing the event
try:
ctx = neutron_context.get_admin_context()
ovn_hash_ring_db.touch_node(ctx, self._node_uuid)
self._last_touch = time_now
except Exception:
LOG.exception('Hash Ring node %s failed to heartbeat',
self._node_uuid)
LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) '
'handling event "%(event)s" for row %(row)s '
'(table: %(table)s)',
{'node': self._node_uuid, 'hostname': CONF.host,
'event': event, 'row': row.uuid,
'table': row._table.name})
self.notify_handler.notify(event, row, updates)
except Exception as e:
LOG.exception(e)
@abc.abstractmethod @abc.abstractmethod
def post_connect(self): def post_connect(self):