Use oslo_service's SignalHandler for signals

When Neutron is killed with SIGTERM (like via systemctl), when using
ML2/OVN neutron workers do not exit and instead are eventually killed
with SIGKILL when the graceful timeout is reached (often around 1
minute).

This is happening due to the signal handlers for SIGTERM. There are
multiple issues.

1) oslo_service, ml2/ovn mech_driver, and ml2/ovo_rpc.py all call
   signal.signal(signal.SIGTERM, ...) overwriting each others signal
   handlers.
2) SIGTERM is handled in the main thread, and running blocking code
   there causes AssertionErrors in eventlet which also prevents the
   process from exiting.
3) The ml2/ovn cleanup code doesn't cause the process to end, so it
   interrupts the killing of the process.

oslo_service has a singleton SignalHandler class that solves all of
these issues

Closes-Bug: #2056366
Depends-On: https://review.opendev.org/c/openstack/oslo.service/+/911627
Change-Id: I730a12746bceaa744c658854e38439420efc4629
Signed-off-by: Terry Wilson <twilson@redhat.com>
This commit is contained in:
Terry Wilson 2024-03-06 20:13:58 +00:00
parent 5c187e8dab
commit a4e49b6b8f
2 changed files with 7 additions and 5 deletions

View File

@ -19,7 +19,6 @@ import datetime
import functools
import multiprocessing
import operator
import signal
import threading
import types
import uuid
@ -43,6 +42,7 @@ from oslo_concurrency import lockutils
from oslo_config import cfg
from oslo_db import exception as os_db_exc
from oslo_log import log
from oslo_service import service as oslo_service
from oslo_utils import timeutils
from ovsdbapp.backend.ovs_idl import idlutils
@ -313,8 +313,9 @@ class OVNMechanismDriver(api.MechanismDriver):
themselves to the hash ring.
"""
# Attempt to remove the node from the ring when the worker stops
sh = oslo_service.SignalHandler()
atexit.register(self._remove_node_from_hash_ring)
signal.signal(signal.SIGTERM, self._remove_node_from_hash_ring)
sh.add_handler("SIGTERM", self._remove_node_from_hash_ring)
admin_context = n_context.get_admin_context()
if not self._hash_ring_probe_event.is_set():

View File

@ -13,7 +13,6 @@
import atexit
import queue
import signal
import threading
import traceback
import weakref
@ -24,6 +23,7 @@ from neutron_lib.callbacks import resources
from neutron_lib import context as n_ctx
from neutron_lib.db import api as db_api
from oslo_log import log as logging
from oslo_service import service
from neutron.api.rpc.callbacks import events as rpc_events
from neutron.api.rpc.handlers import resources_rpc
@ -38,8 +38,9 @@ LOG = logging.getLogger(__name__)
def _setup_change_handlers_cleanup():
atexit.register(_ObjectChangeHandler.clean_up)
signal.signal(signal.SIGINT, _ObjectChangeHandler.clean_up)
signal.signal(signal.SIGTERM, _ObjectChangeHandler.clean_up)
sh = service.SignalHandler()
sh.add_handler("SIGINT", _ObjectChangeHandler.clean_up)
sh.add_handler("SIGTERM", _ObjectChangeHandler.clean_up)
class _ObjectChangeHandler(object):