Use oslo_service's SignalHandler for signals

When Neutron is killed with SIGTERM (like via systemctl), when using
ML2/OVN neutron workers do not exit and instead are eventually killed
with SIGKILL when the graceful timeout is reached (often around 1
minute).

This is happening due to the signal handlers for SIGTERM. There are
multiple issues.

1) oslo_service, ml2/ovn mech_driver, and ml2/ovo_rpc.py all call
   signal.signal(signal.SIGTERM, ...) overwriting each others signal
   handlers.
2) SIGTERM is handled in the main thread, and running blocking code
   there causes AssertionErrors in eventlet which also prevents the
   process from exiting.
3) The ml2/ovn cleanup code doesn't cause the process to end, so it
   interrupts the killing of the process.

oslo_service has a singleton SignalHandler class that solves all of
these issues

Closes-Bug: #2056366
Depends-On: https://review.opendev.org/c/openstack/oslo.service/+/913512
Change-Id: I730a12746bceaa744c658854e38439420efc4629
Signed-off-by: Terry Wilson <twilson@redhat.com>
(cherry picked from commit a4e49b6b8fcf9acfa4e84c65de19ffd56b9022e7)
This commit is contained in:
Terry Wilson 2024-03-06 20:13:58 +00:00
parent c6d4a3e364
commit 2a09a4b802
3 changed files with 8 additions and 6 deletions
neutron/plugins/ml2
drivers/ovn/mech_driver
ovo_rpc.py
requirements.txt

@ -19,7 +19,6 @@ import datetime
import functools
import multiprocessing
import operator
import signal
import threading
import types
import uuid
@ -43,6 +42,7 @@ from oslo_concurrency import lockutils
from oslo_config import cfg
from oslo_db import exception as os_db_exc
from oslo_log import log
from oslo_service import service as oslo_service
from oslo_utils import timeutils
from ovsdbapp.backend.ovs_idl import idlutils
@ -313,8 +313,9 @@ class OVNMechanismDriver(api.MechanismDriver):
themselves to the hash ring.
"""
# Attempt to remove the node from the ring when the worker stops
sh = oslo_service.SignalHandler()
atexit.register(self._remove_node_from_hash_ring)
signal.signal(signal.SIGTERM, self._remove_node_from_hash_ring)
sh.add_handler("SIGTERM", self._remove_node_from_hash_ring)
admin_context = n_context.get_admin_context()
if not self._hash_ring_probe_event.is_set():

@ -13,7 +13,6 @@
import atexit
import queue
import signal
import threading
import traceback
import weakref
@ -24,6 +23,7 @@ from neutron_lib.callbacks import resources
from neutron_lib import context as n_ctx
from neutron_lib.db import api as db_api
from oslo_log import log as logging
from oslo_service import service
from neutron.api.rpc.callbacks import events as rpc_events
from neutron.api.rpc.handlers import resources_rpc
@ -38,8 +38,9 @@ LOG = logging.getLogger(__name__)
def _setup_change_handlers_cleanup():
atexit.register(_ObjectChangeHandler.clean_up)
signal.signal(signal.SIGINT, _ObjectChangeHandler.clean_up)
signal.signal(signal.SIGTERM, _ObjectChangeHandler.clean_up)
sh = service.SignalHandler()
sh.add_handler("SIGINT", _ObjectChangeHandler.clean_up)
sh.add_handler("SIGTERM", _ObjectChangeHandler.clean_up)
class _ObjectChangeHandler(object):

@ -38,7 +38,7 @@ oslo.privsep>=2.3.0 # Apache-2.0
oslo.reports>=1.18.0 # Apache-2.0
oslo.rootwrap>=5.15.0 # Apache-2.0
oslo.serialization>=2.25.0 # Apache-2.0
oslo.service>=2.8.0 # Apache-2.0
oslo.service>=3.4.1 # Apache-2.0
oslo.upgradecheck>=1.3.0 # Apache-2.0
oslo.utils>=7.0.0 # Apache-2.0
oslo.versionedobjects>=1.35.1 # Apache-2.0