From f38873473719f8ca14710cdf9977ea7995d5df8b Mon Sep 17 00:00:00 2001 From: Kiall Mac Innes Date: Fri, 20 May 2016 10:11:18 +0100 Subject: [PATCH] Catch all exceptions in PM's call to get_pool() Unhandled exceptions in central, during PM start, can cause PM to fail. For example, if pool managers called to central.get_pool() fails with, say an OperationalError: (2013, 'Lost connection to MySQL server during query'), then PM will crash out. Change-Id: If076c451e8f9802a24f75d103dece8d1bf06af87 Closes-Bug: 1583982 --- designate/pool_manager/service.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/designate/pool_manager/service.py b/designate/pool_manager/service.py index 4f77189ed..297de39eb 100644 --- a/designate/pool_manager/service.py +++ b/designate/pool_manager/service.py @@ -154,6 +154,9 @@ class Service(service.RPCService, coordination.CoordinationMixin, has_targets = False + # TODO(kiall): This block of code should be replaced with a cleaner, + # limited version. e.g. should retry for X minutes, and + # backoff rather than fixed retry intervals. while not has_targets: try: self.pool = self.central_api.get_pool(context, pool_id) @@ -171,6 +174,12 @@ class Service(service.RPCService, coordination.CoordinationMixin, # designate-central service may not have started yet except messaging.exceptions.MessagingTimeout: time.sleep(0.2) + # designate-central failed in an unknown way, don't allow another + # failing / not started service to cause pool-manager to crash. + except Exception: + LOG.exception(_LE("An unknown exception occurred while " + "fetching pool details")) + time.sleep(5) # Create the necessary Backend instances for each target self._setup_target_backends()