Catch all exceptions in PM's call to get_pool()
Unhandled exceptions in central, during PM start, can cause PM to fail. For example, if pool managers called to central.get_pool() fails with, say an OperationalError: (2013, 'Lost connection to MySQL server during query'), then PM will crash out. Change-Id: If076c451e8f9802a24f75d103dece8d1bf06af87 Closes-Bug: 1583982
This commit is contained in:
parent
d8c6c22f07
commit
f388734737
|
@ -154,6 +154,9 @@ class Service(service.RPCService, coordination.CoordinationMixin,
|
||||||
|
|
||||||
has_targets = False
|
has_targets = False
|
||||||
|
|
||||||
|
# TODO(kiall): This block of code should be replaced with a cleaner,
|
||||||
|
# limited version. e.g. should retry for X minutes, and
|
||||||
|
# backoff rather than fixed retry intervals.
|
||||||
while not has_targets:
|
while not has_targets:
|
||||||
try:
|
try:
|
||||||
self.pool = self.central_api.get_pool(context, pool_id)
|
self.pool = self.central_api.get_pool(context, pool_id)
|
||||||
|
@ -171,6 +174,12 @@ class Service(service.RPCService, coordination.CoordinationMixin,
|
||||||
# designate-central service may not have started yet
|
# designate-central service may not have started yet
|
||||||
except messaging.exceptions.MessagingTimeout:
|
except messaging.exceptions.MessagingTimeout:
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
# designate-central failed in an unknown way, don't allow another
|
||||||
|
# failing / not started service to cause pool-manager to crash.
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(_LE("An unknown exception occurred while "
|
||||||
|
"fetching pool details"))
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
# Create the necessary Backend instances for each target
|
# Create the necessary Backend instances for each target
|
||||||
self._setup_target_backends()
|
self._setup_target_backends()
|
||||||
|
|
Loading…
Reference in New Issue