[OVN] Hash Ring: Set nodes as offline upon exit
This patch implements the proposed solution from LP #2024205 where upon a Neutron being killed, it could trigger the deletion of the entries from the ovn_hash_ring table that matches the server hostname. When this happens on all controllers this could lead to the ovn_hash_ring being rendered empty which will result in ML2/OVN not processing any OVSDB events. Instead of removing the nodes from the ovn_hash_ring table at exit, this patch changes the code to just mark them as offline instead. That way, the nodes will remain registered in the table and the heartbeat thread will set them as online again on the next beat. If the service is stopped properly there won't be any heartbeat anymore and the nodes will be seeing as offline by the Hash Ring Manager (same as if they were deleted). For more info see LP #2024205. Closes-Bug: #2024205 Change-Id: I052841c87651773c4988fcf39f9f978094297704 Signed-off-by: Lucas Alvares Gomes <lucasagomes@gmail.com>
This commit is contained in:
parent
42494ad6ae
commit
f2e3ab3805
@ -58,12 +58,12 @@ class OVNMechanismDriver(mech_driver.OVNMechanismDriver):
|
||||
def ovn_client(self):
|
||||
return self._ovn_client
|
||||
|
||||
def _clean_hash_ring(self):
|
||||
"""Don't clean the hash ring.
|
||||
def _set_hash_ring_nodes_offline(self):
|
||||
"""Don't set hash ring nodes as offline.
|
||||
|
||||
If this method was not overridden, cleanup would be performed when
|
||||
calling the db sync and running neutron server would lose all the nodes
|
||||
from the ring.
|
||||
calling the db sync and running neutron server would mark all the
|
||||
nodes from the ring as offline.
|
||||
"""
|
||||
|
||||
# Since we are not using the ovn mechanism driver while syncing,
|
||||
|
@ -50,10 +50,12 @@ def remove_nodes_from_host(context, group_name):
|
||||
CONF.host, group_name)
|
||||
|
||||
|
||||
def _touch(context, **filter_args):
|
||||
def _touch(context, updated_at=None, **filter_args):
|
||||
if updated_at is None:
|
||||
updated_at = timeutils.utcnow()
|
||||
with db_api.CONTEXT_WRITER.using(context):
|
||||
context.session.query(ovn_models.OVNHashRing).filter_by(
|
||||
**filter_args).update({'updated_at': timeutils.utcnow()})
|
||||
**filter_args).update({'updated_at': updated_at})
|
||||
|
||||
|
||||
def touch_nodes_from_host(context, group_name):
|
||||
@ -92,3 +94,9 @@ def get_active_nodes(context, interval, group_name, from_host=False):
|
||||
def count_offline_nodes(context, interval, group_name):
|
||||
query = _get_nodes_query(context, interval, group_name, offline=True)
|
||||
return query.count()
|
||||
|
||||
|
||||
def set_nodes_from_host_as_offline(context, group_name):
|
||||
timestamp = datetime.datetime(day=26, month=10, year=1985, hour=9)
|
||||
_touch(context, updated_at=timestamp, hostname=CONF.host,
|
||||
group_name=group_name)
|
||||
|
@ -286,15 +286,17 @@ class OVNMechanismDriver(api.MechanismDriver):
|
||||
resources.SECURITY_GROUP_RULE,
|
||||
events.BEFORE_DELETE)
|
||||
|
||||
def _clean_hash_ring(self, *args, **kwargs):
|
||||
def _set_hash_ring_nodes_offline(self, *args, **kwargs):
|
||||
admin_context = n_context.get_admin_context()
|
||||
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
|
||||
self.hash_ring_group)
|
||||
ovn_hash_ring_db.set_nodes_from_host_as_offline(
|
||||
admin_context, self.hash_ring_group)
|
||||
LOG.info('Hash Ring nodes from host "%s" marked as offline',
|
||||
cfg.CONF.host)
|
||||
|
||||
def pre_fork_initialize(self, resource, event, trigger, payload=None):
|
||||
"""Pre-initialize the ML2/OVN driver."""
|
||||
atexit.register(self._clean_hash_ring)
|
||||
signal.signal(signal.SIGTERM, self._clean_hash_ring)
|
||||
atexit.register(self._set_hash_ring_nodes_offline)
|
||||
signal.signal(signal.SIGTERM, self._set_hash_ring_nodes_offline)
|
||||
ovn_utils.create_neutron_pg_drop()
|
||||
|
||||
@staticmethod
|
||||
@ -314,7 +316,9 @@ class OVNMechanismDriver(api.MechanismDriver):
|
||||
"""
|
||||
admin_context = n_context.get_admin_context()
|
||||
if not self._hash_ring_probe_event.is_set():
|
||||
self._clean_hash_ring()
|
||||
# Clear existing entries
|
||||
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
|
||||
self.hash_ring_group)
|
||||
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
|
||||
self.hash_ring_group)
|
||||
self._hash_ring_thread = maintenance.MaintenanceThread()
|
||||
|
@ -326,7 +326,8 @@ class TestOVNFunctionalBase(test_plugin.Ml2PluginV2TestCase,
|
||||
self.addCleanup(self.stop)
|
||||
# NOTE(ralonsoh): do not access to the DB at exit when the SQL
|
||||
# connection is already closed, to avoid useless exception messages.
|
||||
mock.patch.object(self.mech_driver, '_clean_hash_ring').start()
|
||||
mock.patch.object(
|
||||
self.mech_driver, '_set_hash_ring_nodes_offline').start()
|
||||
self.mech_driver.pre_fork_initialize(
|
||||
mock.ANY, mock.ANY, trigger_cls.trigger)
|
||||
|
||||
|
@ -269,3 +269,17 @@ class TestHashRing(testlib_api.SqlTestCaseLight):
|
||||
# Assert no nodes are considered offline
|
||||
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||
|
||||
def test_set_nodes_from_host_as_offline(self):
|
||||
self._add_nodes_and_assert_exists(count=3)
|
||||
|
||||
active_nodes = ovn_hash_ring_db.get_active_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
|
||||
self.assertEqual(3, len(active_nodes))
|
||||
|
||||
ovn_hash_ring_db.set_nodes_from_host_as_offline(
|
||||
self.admin_ctx, HASH_RING_TEST_GROUP)
|
||||
|
||||
active_nodes = ovn_hash_ring_db.get_active_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
|
||||
self.assertEqual(0, len(active_nodes))
|
||||
|
Loading…
Reference in New Issue
Block a user