diff --git a/kuryr_kubernetes/controller/managers/health.py b/kuryr_kubernetes/controller/managers/health.py index 1d9fc0d39..e8cb79ea3 100644 --- a/kuryr_kubernetes/controller/managers/health.py +++ b/kuryr_kubernetes/controller/managers/health.py @@ -95,8 +95,14 @@ class HealthServer(base_server.BaseHealthServer): def liveness_status(self): for component in self._registry: if not component.is_alive(): - msg = 'Component %s is dead.' % component.__class__.__name__ - LOG.error(msg) + exc = component.get_last_exception() + if not exc: + msg = f'Component {component.__class__.__name__} is dead.' + LOG.error(msg) + else: + msg = (f'Component {component.__class__.__name__} is dead.' + f' Last caught exception below') + LOG.exception(msg, exc_info=exc) return msg, httplib.INTERNAL_SERVER_ERROR, {} return 'ok', httplib.OK, {} diff --git a/kuryr_kubernetes/handlers/health.py b/kuryr_kubernetes/handlers/health.py index 175210950..2fe30cbd9 100644 --- a/kuryr_kubernetes/handlers/health.py +++ b/kuryr_kubernetes/handlers/health.py @@ -37,8 +37,11 @@ class HealthHandler(object): self._ready = True self._manager = HealthRegister.get_instance() self._manager.register(self) + self._last_exception = None - def set_liveness(self, alive): + def set_liveness(self, alive, exc=None): + if exc: + self._last_exception = exc self._alive = alive def set_readiness(self, ready): @@ -49,3 +52,6 @@ class HealthHandler(object): def is_ready(self, *args): return self._ready + + def get_last_exception(self): + return self._last_exception diff --git a/kuryr_kubernetes/handlers/retry.py b/kuryr_kubernetes/handlers/retry.py index 6421cca80..1116137ad 100644 --- a/kuryr_kubernetes/handlers/retry.py +++ b/kuryr_kubernetes/handlers/retry.py @@ -96,10 +96,10 @@ class Retry(base.EventHandler): ex.reraise = False else: LOG.debug('Report handler unhealthy %s', self._handler) - self._handler.set_liveness(alive=False) - except Exception: + self._handler.set_liveness(alive=False, exc=ex.value) + except Exception as ex: LOG.exception('Report handler unhealthy %s', self._handler) - self._handler.set_liveness(alive=False) + self._handler.set_liveness(alive=False, exc=ex) raise def _sleep(self, deadline, attempt, exception):