Fix controller worker graceful shutdown
Fix an issue that prevents graceful shutdown of controller workers. cotyledon.Service.terminate function is by definition the graceful termination function and doesn't have any 'graceful' optional boolean argument (https://cotyledon.readthedocs.io/en/latest/api.html). Because of this error, message_listener.wait() was never called in the consumers' termination functions, so flows could be interrupted before completion and could leave resources such as load balancer in a PENDING_* provisioning state. By default cotyledon.Service terminates the server after a timeout if the worker could not shutdown itself gracefully. The default value for the timeout is 300 seconds (set in devstack plugin) and can be overriden using the graceful_shutdown_timeout setting in octavia.conf The default value will be updated to a lower value when work on persistant taskflow will be merged. Story: 2006603 Task: 36770 Conflicts: devstack/plugin.sh Change-Id: I3f776bd018246897c9a889699a2d0ecbbfbb7098 (cherry picked from commit215c4c2284
) (cherry picked from commit0fc1ea4789
) (cherry picked from commit6fe5df6fd8
)
This commit is contained in:
parent
6d3020d920
commit
1f61b1f090
|
@ -307,6 +307,10 @@ function octavia_configure {
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# set default graceful_shutdown_timeout to 300 sec (5 minutes)
|
||||||
|
# TODO(gthiemonge) update this value after persistant taskflow commits are
|
||||||
|
# merged
|
||||||
|
iniset $OCTAVIA_CONF DEFAULT graceful_shutdown_timeout 300
|
||||||
}
|
}
|
||||||
|
|
||||||
function create_mgmt_network_interface {
|
function create_mgmt_network_interface {
|
||||||
|
|
|
@ -26,3 +26,4 @@ Octavia Configuration Options
|
||||||
oslo.db
|
oslo.db
|
||||||
oslo.log
|
oslo.log
|
||||||
oslo.messaging
|
oslo.messaging
|
||||||
|
cotyledon
|
||||||
|
|
|
@ -16,6 +16,9 @@
|
||||||
# transport_url = rabbit://<user>:<pass>@server01,<user>:<pass>@server02/<vhost>
|
# transport_url = rabbit://<user>:<pass>@server01,<user>:<pass>@server02/<vhost>
|
||||||
# transport_url =
|
# transport_url =
|
||||||
|
|
||||||
|
# How long in seconds to wait for octavia worker to exit before killing them.
|
||||||
|
# graceful_shutdown_timeout = 60
|
||||||
|
|
||||||
[api_settings]
|
[api_settings]
|
||||||
# bind_host = 127.0.0.1
|
# bind_host = 127.0.0.1
|
||||||
# bind_port = 9876
|
# bind_port = 9876
|
||||||
|
|
|
@ -46,14 +46,14 @@ class ConsumerService(cotyledon.Service):
|
||||||
)
|
)
|
||||||
self.message_listener.start()
|
self.message_listener.start()
|
||||||
|
|
||||||
def terminate(self, graceful=False):
|
def terminate(self):
|
||||||
if self.message_listener:
|
if self.message_listener:
|
||||||
LOG.info('Stopping consumer...')
|
LOG.info('Stopping consumer...')
|
||||||
self.message_listener.stop()
|
self.message_listener.stop()
|
||||||
if graceful:
|
|
||||||
LOG.info('Consumer successfully stopped. Waiting for final '
|
LOG.info('Consumer successfully stopped. Waiting for final '
|
||||||
'messages to be processed...')
|
'messages to be processed...')
|
||||||
self.message_listener.wait()
|
self.message_listener.wait()
|
||||||
if self.endpoints:
|
if self.endpoints:
|
||||||
LOG.info('Shutting down endpoint worker executors...')
|
LOG.info('Shutting down endpoint worker executors...')
|
||||||
for e in self.endpoints:
|
for e in self.endpoints:
|
||||||
|
|
|
@ -58,15 +58,4 @@ class TestConsumer(base.TestRpc):
|
||||||
cons.run()
|
cons.run()
|
||||||
cons.terminate()
|
cons.terminate()
|
||||||
mock_rpc_server_rv.stop.assert_called_once_with()
|
mock_rpc_server_rv.stop.assert_called_once_with()
|
||||||
self.assertFalse(mock_rpc_server_rv.wait.called)
|
|
||||||
|
|
||||||
@mock.patch.object(messaging, 'get_rpc_server')
|
|
||||||
def test_consumer_graceful_terminate(self, mock_rpc_server):
|
|
||||||
mock_rpc_server_rv = mock.Mock()
|
|
||||||
mock_rpc_server.return_value = mock_rpc_server_rv
|
|
||||||
|
|
||||||
cons = consumer.ConsumerService(1, self.conf)
|
|
||||||
cons.run()
|
|
||||||
cons.terminate(graceful=True)
|
|
||||||
mock_rpc_server_rv.stop.assert_called_once_with()
|
|
||||||
mock_rpc_server_rv.wait.assert_called_once_with()
|
mock_rpc_server_rv.wait.assert_called_once_with()
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fix a bug that could interrupt resource creation when performing a graceful
|
||||||
|
shutdown of the controller worker and leave resources in a
|
||||||
|
PENDING_CREATE/PENDING_UPDATE/PENDING_DELETE provisioning status. If the
|
||||||
|
duration of an Octavia flow is greater than the 'graceful_shutdown_timeout'
|
||||||
|
configuration value, stopping the Octavia worker can still interrupt the
|
||||||
|
creation of resources.
|
3
tox.ini
3
tox.ini
|
@ -103,7 +103,8 @@ commands =
|
||||||
--namespace oslo.db \
|
--namespace oslo.db \
|
||||||
--namespace oslo.log \
|
--namespace oslo.log \
|
||||||
--namespace oslo.messaging \
|
--namespace oslo.messaging \
|
||||||
--namespace keystonemiddleware.auth_token
|
--namespace keystonemiddleware.auth_token \
|
||||||
|
--namespace cotyledon
|
||||||
|
|
||||||
[testenv:genpolicy]
|
[testenv:genpolicy]
|
||||||
whitelist_externals = mkdir
|
whitelist_externals = mkdir
|
||||||
|
|
Loading…
Reference in New Issue