Fix house keeping graceful shutdown

SIGTERM was not caught by house keeping service, threads were not
correctly terminated and spare amphora creation may have been killed
when restarting the service, leaving amphorae in BOOTING status.

Story: 2007008
Task: 37788

Change-Id: Ib3d6aa0f2fd550c3a8756c41e0159c5ae8127c7c
This commit is contained in:
Gregory Thiemonge 2019-12-11 17:09:10 +01:00
parent e7ebd3905a
commit df0ba06c1e
3 changed files with 29 additions and 20 deletions

View File

@ -17,7 +17,6 @@ import datetime
import signal
import sys
import threading
import time
from oslo_config import cfg
from oslo_log import log as logging
@ -104,28 +103,27 @@ def main():
timestamp = str(datetime.datetime.utcnow())
LOG.info("Starting house keeping at %s", timestamp)
threads = []
# Thread to perform spare amphora check
spare_amp_thread = threading.Thread(target=spare_amphora_check)
spare_amp_thread.daemon = True
spare_amp_thread.start()
threads.append(spare_amp_thread)
# Thread to perform db cleanup
db_cleanup_thread = threading.Thread(target=db_cleanup)
db_cleanup_thread.daemon = True
db_cleanup_thread.start()
threads.append(db_cleanup_thread)
# Thread to perform certificate rotation
cert_rotate_thread = threading.Thread(target=cert_rotation)
cert_rotate_thread.daemon = True
cert_rotate_thread.start()
threads.append(cert_rotate_thread)
signal.signal(signal.SIGHUP, _mutate_config)
# Try-Exception block should be at the end to gracefully exit threads
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
def process_cleanup(*args, **kwargs):
LOG.info("Attempting to gracefully terminate House-Keeping")
spare_amp_thread_event.set()
db_cleanup_thread_event.set()
@ -134,3 +132,12 @@ def main():
db_cleanup_thread.join()
cert_rotate_thread.join()
LOG.info("House-Keeping process terminated")
signal.signal(signal.SIGTERM, process_cleanup)
signal.signal(signal.SIGHUP, _mutate_config)
try:
for thread in threads:
thread.join()
except KeyboardInterrupt:
process_cleanup()

View File

@ -109,7 +109,6 @@ class TestHouseKeepingCMD(base.TestCase):
mock_CertRotation.assert_called_once_with()
self.assertEqual(1, cert_rotate_mock.rotate.call_count)
@mock.patch('time.sleep')
@mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event')
@mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event')
@mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event')
@ -118,7 +117,7 @@ class TestHouseKeepingCMD(base.TestCase):
def test_main(self, mock_service, mock_thread,
spare_amp_thread_event_mock,
db_cleanup_thread_event_mock,
cert_rotate_thread_event_mock, sleep_time):
cert_rotate_thread_event_mock):
spare_amp_thread_mock = mock.MagicMock()
db_cleanup_thread_mock = mock.MagicMock()
@ -132,9 +131,7 @@ class TestHouseKeepingCMD(base.TestCase):
db_cleanup_thread_mock.daemon.return_value = True
cert_rotate_thread_mock.daemon.return_value = True
# mock the time.sleep() in the while loop
sleep_time.side_effect = [True, Exception('break')]
self.assertRaisesRegex(Exception, 'break', house_keeping.main)
house_keeping.main()
spare_amp_thread_mock.start.assert_called_once_with()
db_cleanup_thread_mock.start.assert_called_once_with()
@ -144,7 +141,6 @@ class TestHouseKeepingCMD(base.TestCase):
self.assertTrue(db_cleanup_thread_mock.daemon)
self.assertTrue(cert_rotate_thread_mock.daemon)
@mock.patch('time.sleep')
@mock.patch('octavia.cmd.house_keeping.cert_rotate_thread_event')
@mock.patch('octavia.cmd.house_keeping.db_cleanup_thread_event')
@mock.patch('octavia.cmd.house_keeping.spare_amp_thread_event')
@ -153,8 +149,7 @@ class TestHouseKeepingCMD(base.TestCase):
def test_main_keyboard_interrupt(self, mock_service, mock_thread,
spare_amp_thread_event_mock,
db_cleanup_thread_event_mock,
cert_rotate_thread_event_mock,
sleep_time):
cert_rotate_thread_event_mock):
spare_amp_thread_mock = mock.MagicMock()
db_cleanup_thread_mock = mock.MagicMock()
cert_rotate_thread_mock = mock.MagicMock()
@ -167,8 +162,10 @@ class TestHouseKeepingCMD(base.TestCase):
db_cleanup_thread_mock.daemon.return_value = True
cert_rotate_thread_mock.daemon.return_value = True
# mock the time.sleep() in the while loop
sleep_time.side_effect = [True, KeyboardInterrupt]
mock_join = mock.MagicMock()
mock_join.side_effect = [KeyboardInterrupt, None]
spare_amp_thread_mock.join = mock_join
house_keeping.main()
spare_amp_thread_event_mock.set.assert_called_once_with()
@ -184,8 +181,7 @@ class TestHouseKeepingCMD(base.TestCase):
self.assertTrue(spare_amp_thread_mock.daemon)
self.assertTrue(db_cleanup_thread_mock.daemon)
self.assertTrue(cert_rotate_thread_mock.daemon)
spare_amp_thread_mock.join.assert_called_once_with()
self.assertEqual(2, spare_amp_thread_mock.join.call_count)
db_cleanup_thread_mock.join.assert_called_once_with()
cert_rotate_thread_mock.join.assert_called_once_with()

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fix a bug that could interrupt resource creation when performing a graceful
shutdown of the house keeping service and leave resources such as amphorae
in a BOOTING status.