Merge "Adds more exception handling for ironic-conductor heartbeat"

This commit is contained in:
Jenkins 2017-09-19 07:53:22 +00:00 committed by Gerrit Code Review
commit 95cc0b110b
3 changed files with 25 additions and 0 deletions

View File

@ -393,6 +393,9 @@ class BaseConductorManager(object):
except db_exception.DBConnectionError: except db_exception.DBConnectionError:
LOG.warning('Conductor could not connect to database ' LOG.warning('Conductor could not connect to database '
'while heartbeating.') 'while heartbeating.')
except Exception as e:
LOG.exception('Error while heartbeating. Error: %(err)s',
{'err': e})
self._keepalive_evt.wait(CONF.conductor.heartbeat_interval) self._keepalive_evt.wait(CONF.conductor.heartbeat_interval)
def _mapped_to_this_conductor(self, node_uuid, driver): def _mapped_to_this_conductor(self, node_uuid, driver):

View File

@ -320,6 +320,19 @@ class KeepAliveTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.service._conductor_service_record_keepalive() self.service._conductor_service_record_keepalive()
self.assertEqual(3, mock_touch.call_count) self.assertEqual(3, mock_touch.call_count)
def test__conductor_service_record_keepalive_failed_error(self):
self._start_service()
# avoid wasting time at the event.wait()
CONF.set_override('heartbeat_interval', 0, 'conductor')
with mock.patch.object(self.dbapi, 'touch_conductor') as mock_touch:
mock_touch.side_effect = [None, Exception(),
None]
with mock.patch.object(self.service._keepalive_evt,
'is_set') as mock_is_set:
mock_is_set.side_effect = [False, False, False, True]
self.service._conductor_service_record_keepalive()
self.assertEqual(3, mock_touch.call_count)
class ManagerSpawnWorkerTestCase(tests_base.TestCase): class ManagerSpawnWorkerTestCase(tests_base.TestCase):
def setUp(self): def setUp(self):

View File

@ -0,0 +1,9 @@
---
fixes:
- |
Fixes an issue where an ironic-conductor service was deemed dead because
the service could not report its heartbeat due to the database connection
experiencing an unexpected failure. Full tracebacks of these exceptions are
now logged, and if the database connection recovers in a reasonable amount
of time the service will still be available.
See https://bugs.launchpad.net/ironic/+bug/1696296.