From 14720138309c67d3a6dcaeb6b7a784e21cd74ad2 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Tue, 10 Jun 2014 14:26:42 +0300 Subject: [PATCH] Slow down Kombu reconnect attempts For a rationale for this patch, see the discussion surrounding Bug When reconnecting to a RabbitMQ cluster with mirrored queues in use, the attempt to release the connection can hang "indefinitely" somewhere deep down in Kombu. Blocking the thread for a bit prior to release seems to kludge around the problem where it is otherwise reproduceable. DocImpact Change-Id: Ic2ede3046709b831adf8204e4c909c589c1786c4 Partial-Bug: #856764 --- openstack/common/rpc/impl_kombu.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/openstack/common/rpc/impl_kombu.py b/openstack/common/rpc/impl_kombu.py index 32f2a85f4..f077c56e4 100644 --- a/openstack/common/rpc/impl_kombu.py +++ b/openstack/common/rpc/impl_kombu.py @@ -52,6 +52,10 @@ kombu_opts = [ default='', help=('SSL certification authority file ' '(valid only if SSL enabled)')), + cfg.FloatOpt('kombu_reconnect_delay', + default=1.0, + help='How long to wait before reconnecting in response to an ' + 'AMQP consumer cancel notification.'), cfg.StrOpt('rabbit_host', default='localhost', help='The RabbitMQ broker address where a single node is used'), @@ -498,6 +502,17 @@ class Connection(object): LOG.info(_LI("Reconnecting to AMQP server on " "%(hostname)s:%(port)d") % params) try: + # XXX(nic): when reconnecting to a RabbitMQ cluster + # with mirrored queues in use, the attempt to release the + # connection can hang "indefinitely" somewhere deep down + # in Kombu. Blocking the thread for a bit prior to + # release seems to kludge around the problem where it is + # otherwise reproduceable. + if self.conf.kombu_reconnect_delay > 0: + LOG.info(_("Delaying reconnect for %1.1f seconds...") % + self.conf.kombu_reconnect_delay) + time.sleep(self.conf.kombu_reconnect_delay) + self.connection.release() except self.connection_errors: pass