diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index c95a6e8eda..34319babf3 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -393,6 +393,9 @@ class BaseConductorManager(object): except db_exception.DBConnectionError: LOG.warning('Conductor could not connect to database ' 'while heartbeating.') + except Exception as e: + LOG.exception('Error while heartbeating. Error: %(err)s', + {'err': e}) self._keepalive_evt.wait(CONF.conductor.heartbeat_interval) def _mapped_to_this_conductor(self, node_uuid, driver): diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 060da59d1b..ea2ff86aa3 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -320,6 +320,19 @@ class KeepAliveTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.service._conductor_service_record_keepalive() self.assertEqual(3, mock_touch.call_count) + def test__conductor_service_record_keepalive_failed_error(self): + self._start_service() + # avoid wasting time at the event.wait() + CONF.set_override('heartbeat_interval', 0, 'conductor') + with mock.patch.object(self.dbapi, 'touch_conductor') as mock_touch: + mock_touch.side_effect = [None, Exception(), + None] + with mock.patch.object(self.service._keepalive_evt, + 'is_set') as mock_is_set: + mock_is_set.side_effect = [False, False, False, True] + self.service._conductor_service_record_keepalive() + self.assertEqual(3, mock_touch.call_count) + class ManagerSpawnWorkerTestCase(tests_base.TestCase): def setUp(self): diff --git a/releasenotes/notes/bug-1696296-a972c8d879b98940.yaml b/releasenotes/notes/bug-1696296-a972c8d879b98940.yaml new file mode 100644 index 0000000000..eb9f19212d --- /dev/null +++ b/releasenotes/notes/bug-1696296-a972c8d879b98940.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes an issue where an ironic-conductor service was deemed dead because + the service could not report its heartbeat due to the database connection + experiencing an unexpected failure. Full tracebacks of these exceptions are + now logged, and if the database connection recovers in a reasonable amount + of time the service will still be available. + See https://bugs.launchpad.net/ironic/+bug/1696296. \ No newline at end of file