From ae69762511506782b88bdef61bcf4c9561dcf740 Mon Sep 17 00:00:00 2001 From: Gregory Thiemonge Date: Wed, 11 Dec 2019 17:09:10 +0100 Subject: [PATCH] Fix house keeping graceful shutdown SIGTERM was not caught by house keeping service, threads were not correctly terminated and spare amphora creation may have been killed when restarting the service, leaving amphorae in BOOTING status. Story: 2007008 Task: 37788 Change-Id: Ib3d6aa0f2fd550c3a8756c41e0159c5ae8127c7c (cherry picked from commit df0ba06c1e96dc5ad4855d24435970ee926175ca) --- octavia/cmd/house_keeping.py | 21 +++++++++++++------ octavia/tests/unit/cmd/test_house_keeping.py | 20 +++++++----------- ...use-keeping-shutdown-17b04417a2c4849f.yaml | 6 ++++++ 3 files changed, 29 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml diff --git a/octavia/cmd/house_keeping.py b/octavia/cmd/house_keeping.py index 588180a8f1..4d98a7cbf3 100644 --- a/octavia/cmd/house_keeping.py +++ b/octavia/cmd/house_keeping.py @@ -14,9 +14,9 @@ # import datetime +import signal import sys import threading -import time from oslo_config import cfg from oslo_log import log as logging @@ -98,26 +98,27 @@ def main(): timestamp = str(datetime.datetime.utcnow()) LOG.info("Starting house keeping at %s", timestamp) + threads = [] + # Thread to perform spare amphora check spare_amp_thread = threading.Thread(target=spare_amphora_check) spare_amp_thread.daemon = True spare_amp_thread.start() + threads.append(spare_amp_thread) # Thread to perform db cleanup db_cleanup_thread = threading.Thread(target=db_cleanup) db_cleanup_thread.daemon = True db_cleanup_thread.start() + threads.append(db_cleanup_thread) # Thread to perform certificate rotation cert_rotate_thread = threading.Thread(target=cert_rotation) cert_rotate_thread.daemon = True cert_rotate_thread.start() + threads.append(cert_rotate_thread) - # Try-Exception block should be at the end to gracefully exit threads - try: - while True: - time.sleep(1) - except KeyboardInterrupt: + def process_cleanup(*args, **kwargs): LOG.info("Attempting to gracefully terminate House-Keeping") spare_amp_thread_event.set() db_cleanup_thread_event.set() @@ -126,3 +127,11 @@ def main(): db_cleanup_thread.join() cert_rotate_thread.join() LOG.info("House-Keeping process terminated") + + signal.signal(signal.SIGTERM, process_cleanup) + + try: + for thread in threads: + thread.join() + except KeyboardInterrupt: + process_cleanup() diff --git a/octavia/tests/unit/cmd/test_house_keeping.py b/octavia/tests/unit/cmd/test_house_keeping.py index 701c3b56ff..2491d109f2 100644 --- a/octavia/tests/unit/cmd/test_house_keeping.py +++ b/octavia/tests/unit/cmd/test_house_keeping.py @@ -109,7 +109,6 @@ class TestHouseKeepingCMD(base.TestCase): mock_CertRotation.assert_called_once_with() self.assertEqual(1, cert_rotate_mock.rotate.call_count) - @mock.patch('time.sleep') @mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event') @mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event') @mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event') @@ -118,7 +117,7 @@ class TestHouseKeepingCMD(base.TestCase): def test_main(self, mock_service, mock_thread, spare_amp_thread_event_mock, db_cleanup_thread_event_mock, - cert_rotate_thread_event_mock, sleep_time): + cert_rotate_thread_event_mock): spare_amp_thread_mock = mock.MagicMock() db_cleanup_thread_mock = mock.MagicMock() @@ -132,9 +131,7 @@ class TestHouseKeepingCMD(base.TestCase): db_cleanup_thread_mock.daemon.return_value = True cert_rotate_thread_mock.daemon.return_value = True - # mock the time.sleep() in the while loop - sleep_time.side_effect = [True, Exception('break')] - self.assertRaisesRegex(Exception, 'break', house_keeping.main) + house_keeping.main() spare_amp_thread_mock.start.assert_called_once_with() db_cleanup_thread_mock.start.assert_called_once_with() @@ -144,7 +141,6 @@ class TestHouseKeepingCMD(base.TestCase): self.assertTrue(db_cleanup_thread_mock.daemon) self.assertTrue(cert_rotate_thread_mock.daemon) - @mock.patch('time.sleep') @mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event') @mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event') @mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event') @@ -153,8 +149,7 @@ class TestHouseKeepingCMD(base.TestCase): def test_main_keyboard_interrupt(self, mock_service, mock_thread, spare_amp_thread_event_mock, db_cleanup_thread_event_mock, - cert_rotate_thread_event_mock, - sleep_time): + cert_rotate_thread_event_mock): spare_amp_thread_mock = mock.MagicMock() db_cleanup_thread_mock = mock.MagicMock() cert_rotate_thread_mock = mock.MagicMock() @@ -167,8 +162,10 @@ class TestHouseKeepingCMD(base.TestCase): db_cleanup_thread_mock.daemon.return_value = True cert_rotate_thread_mock.daemon.return_value = True - # mock the time.sleep() in the while loop - sleep_time.side_effect = [True, KeyboardInterrupt] + mock_join = mock.MagicMock() + mock_join.side_effect = [KeyboardInterrupt, None] + spare_amp_thread_mock.join = mock_join + house_keeping.main() spare_amp_thread_event_mock.set.assert_called_once_with() @@ -184,7 +181,6 @@ class TestHouseKeepingCMD(base.TestCase): self.assertTrue(spare_amp_thread_mock.daemon) self.assertTrue(db_cleanup_thread_mock.daemon) self.assertTrue(cert_rotate_thread_mock.daemon) - - spare_amp_thread_mock.join.assert_called_once_with() + self.assertEqual(2, spare_amp_thread_mock.join.call_count) db_cleanup_thread_mock.join.assert_called_once_with() cert_rotate_thread_mock.join.assert_called_once_with() diff --git a/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml b/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml new file mode 100644 index 0000000000..0fd7aef9e6 --- /dev/null +++ b/releasenotes/notes/fix-house-keeping-shutdown-17b04417a2c4849f.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fix a bug that could interrupt resource creation when performing a graceful + shutdown of the house keeping service and leave resources such as amphorae + in a BOOTING status.