From 6fe5df6fd8b80e3022120f2bd74098a8e71152f7 Mon Sep 17 00:00:00 2001 From: Gregory Thiemonge Date: Mon, 23 Sep 2019 20:49:43 +0200 Subject: [PATCH] Fix controller worker graceful shutdown Fix an issue that prevents graceful shutdown of controller workers. cotyledon.Service.terminate function is by definition the graceful termination function and doesn't have any 'graceful' optional boolean argument (https://cotyledon.readthedocs.io/en/latest/api.html). Because of this error, message_listener.wait() was never called in the consumers' termination functions, so flows could be interrupted before completion and could leave resources such as load balancer in a PENDING_* provisioning state. By default cotyledon.Service terminates the server after a timeout if the worker could not shutdown itself gracefully. The default value for the timeout is 300 seconds (set in devstack plugin) and can be overriden using the graceful_shutdown_timeout setting in octavia.conf The default value will be updated to a lower value when work on persistant taskflow will be merged. Story: 2006603 Task: 36770 Change-Id: I3f776bd018246897c9a889699a2d0ecbbfbb7098 (cherry picked from commit 215c4c2284209ff46dc62f7cc04432cb2c9bf39e) (cherry picked from commit 0fc1ea4789eaf743e04db31d366b64a34172df15) --- devstack/plugin.sh | 5 +++++ doc/source/configuration/configref.rst | 1 + etc/octavia.conf | 3 +++ octavia/controller/queue/consumer.py | 10 +++++----- octavia/tests/unit/controller/queue/test_consumer.py | 11 ----------- ...fix-worker-graceful-shutdown-c44b6797637aa1b3.yaml | 9 +++++++++ tox.ini | 3 ++- 7 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 releasenotes/notes/fix-worker-graceful-shutdown-c44b6797637aa1b3.yaml diff --git a/devstack/plugin.sh b/devstack/plugin.sh index ab029291f3..ed7a9cf041 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -332,6 +332,11 @@ function octavia_configure { iniset $OCTAVIA_CONF DEFAULT bind_port ${OCTAVIA_HA_PORT} iniset $OCTAVIA_CONF DEFAULT bind_host 0.0.0.0 fi + + # set default graceful_shutdown_timeout to 300 sec (5 minutes) + # TODO(gthiemonge) update this value after persistant taskflow commits are + # merged + iniset $OCTAVIA_CONF DEFAULT graceful_shutdown_timeout 300 } function create_mgmt_network_interface { diff --git a/doc/source/configuration/configref.rst b/doc/source/configuration/configref.rst index 079eb6156f..d39bf2e66e 100644 --- a/doc/source/configuration/configref.rst +++ b/doc/source/configuration/configref.rst @@ -26,3 +26,4 @@ Octavia Configuration Options oslo.db oslo.log oslo.messaging + cotyledon diff --git a/etc/octavia.conf b/etc/octavia.conf index 81015256b7..aafac0341c 100644 --- a/etc/octavia.conf +++ b/etc/octavia.conf @@ -16,6 +16,9 @@ # transport_url = rabbit://:@server01,:@server02/ # transport_url = +# How long in seconds to wait for octavia worker to exit before killing them. +# graceful_shutdown_timeout = 60 + [api_settings] # bind_host = 127.0.0.1 # bind_port = 9876 diff --git a/octavia/controller/queue/consumer.py b/octavia/controller/queue/consumer.py index 5cf8766dfa..033f10846b 100644 --- a/octavia/controller/queue/consumer.py +++ b/octavia/controller/queue/consumer.py @@ -46,14 +46,14 @@ class ConsumerService(cotyledon.Service): ) self.message_listener.start() - def terminate(self, graceful=False): + def terminate(self): if self.message_listener: LOG.info('Stopping consumer...') self.message_listener.stop() - if graceful: - LOG.info('Consumer successfully stopped. Waiting for final ' - 'messages to be processed...') - self.message_listener.wait() + + LOG.info('Consumer successfully stopped. Waiting for final ' + 'messages to be processed...') + self.message_listener.wait() if self.endpoints: LOG.info('Shutting down endpoint worker executors...') for e in self.endpoints: diff --git a/octavia/tests/unit/controller/queue/test_consumer.py b/octavia/tests/unit/controller/queue/test_consumer.py index 4e3865a587..16833a685c 100644 --- a/octavia/tests/unit/controller/queue/test_consumer.py +++ b/octavia/tests/unit/controller/queue/test_consumer.py @@ -58,15 +58,4 @@ class TestConsumer(base.TestRpc): cons.run() cons.terminate() mock_rpc_server_rv.stop.assert_called_once_with() - self.assertFalse(mock_rpc_server_rv.wait.called) - - @mock.patch.object(messaging, 'get_rpc_server') - def test_consumer_graceful_terminate(self, mock_rpc_server): - mock_rpc_server_rv = mock.Mock() - mock_rpc_server.return_value = mock_rpc_server_rv - - cons = consumer.ConsumerService(1, self.conf) - cons.run() - cons.terminate(graceful=True) - mock_rpc_server_rv.stop.assert_called_once_with() mock_rpc_server_rv.wait.assert_called_once_with() diff --git a/releasenotes/notes/fix-worker-graceful-shutdown-c44b6797637aa1b3.yaml b/releasenotes/notes/fix-worker-graceful-shutdown-c44b6797637aa1b3.yaml new file mode 100644 index 0000000000..b2856462e6 --- /dev/null +++ b/releasenotes/notes/fix-worker-graceful-shutdown-c44b6797637aa1b3.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fix a bug that could interrupt resource creation when performing a graceful + shutdown of the controller worker and leave resources in a + PENDING_CREATE/PENDING_UPDATE/PENDING_DELETE provisioning status. If the + duration of an Octavia flow is greater than the 'graceful_shutdown_timeout' + configuration value, stopping the Octavia worker can still interrupt the + creation of resources. diff --git a/tox.ini b/tox.ini index 652ac297a2..bf05fa8d59 100644 --- a/tox.ini +++ b/tox.ini @@ -114,7 +114,8 @@ commands = --namespace oslo.db \ --namespace oslo.log \ --namespace oslo.messaging \ - --namespace keystonemiddleware.auth_token + --namespace keystonemiddleware.auth_token \ + --namespace cotyledon [testenv:genpolicy] basepython = python3