Adds more exception handling for ironic-conductor heartbeat
When heartbeat thread of ironic-conductor server is reporting heartbeat, it will be interrupted by database exceptions except 'DBConnectionError'. So add 'Exception' in _conductor_service_record_keepalive to catch all possible exceptions raised from database to ensure the heartbeat thread not to exit. And also log the exception information. When the database recovers from an exception, heartbeat thread will continue to report heartbeat. Change-Id: I0dc3ada945275811ef7272d500823e0a57011e8f Closes-Bug: #1696296
This commit is contained in:
parent
5ea8d9f354
commit
56b8eae918
@ -393,6 +393,9 @@ class BaseConductorManager(object):
|
||||
except db_exception.DBConnectionError:
|
||||
LOG.warning('Conductor could not connect to database '
|
||||
'while heartbeating.')
|
||||
except Exception as e:
|
||||
LOG.exception('Error while heartbeating. Error: %(err)s',
|
||||
{'err': e})
|
||||
self._keepalive_evt.wait(CONF.conductor.heartbeat_interval)
|
||||
|
||||
def _mapped_to_this_conductor(self, node_uuid, driver):
|
||||
|
@ -320,6 +320,19 @@ class KeepAliveTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
self.service._conductor_service_record_keepalive()
|
||||
self.assertEqual(3, mock_touch.call_count)
|
||||
|
||||
def test__conductor_service_record_keepalive_failed_error(self):
|
||||
self._start_service()
|
||||
# avoid wasting time at the event.wait()
|
||||
CONF.set_override('heartbeat_interval', 0, 'conductor')
|
||||
with mock.patch.object(self.dbapi, 'touch_conductor') as mock_touch:
|
||||
mock_touch.side_effect = [None, Exception(),
|
||||
None]
|
||||
with mock.patch.object(self.service._keepalive_evt,
|
||||
'is_set') as mock_is_set:
|
||||
mock_is_set.side_effect = [False, False, False, True]
|
||||
self.service._conductor_service_record_keepalive()
|
||||
self.assertEqual(3, mock_touch.call_count)
|
||||
|
||||
|
||||
class ManagerSpawnWorkerTestCase(tests_base.TestCase):
|
||||
def setUp(self):
|
||||
|
9
releasenotes/notes/bug-1696296-a972c8d879b98940.yaml
Normal file
9
releasenotes/notes/bug-1696296-a972c8d879b98940.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue where an ironic-conductor service was deemed dead because
|
||||
the service could not report its heartbeat due to the database connection
|
||||
experiencing an unexpected failure. Full tracebacks of these exceptions are
|
||||
now logged, and if the database connection recovers in a reasonable amount
|
||||
of time the service will still be available.
|
||||
See https://bugs.launchpad.net/ironic/+bug/1696296.
|
Loading…
Reference in New Issue
Block a user