diff --git a/nova/servicegroup/drivers/db.py b/nova/servicegroup/drivers/db.py index 28471d86f137..99bad8522ed0 100644 --- a/nova/servicegroup/drivers/db.py +++ b/nova/servicegroup/drivers/db.py @@ -14,20 +14,18 @@ # limitations under the License. from oslo_config import cfg -from oslo_db import exception as db_exception from oslo_log import log as logging import oslo_messaging as messaging from oslo_utils import timeutils import six -from nova.i18n import _, _LI, _LW +from nova.i18n import _, _LI, _LW, _LE from nova.servicegroup import api from nova.servicegroup.drivers import base CONF = cfg.CONF CONF.import_opt('service_down_time', 'nova.service') -CONF.import_opt('use_local', 'nova.conductor.api', group='conductor') LOG = logging.getLogger(__name__) @@ -85,13 +83,6 @@ class DbDriver(base.Driver): def _report_state(self, service): """Update the state of this service in the datastore.""" - if CONF.conductor.use_local: - # need to catch DB type errors - exc_cls = db_exception.DBError # oslo.db exception base class - else: - # need to catch messaging timeouts - exc_cls = messaging.MessagingTimeout - try: service.service_ref.report_count += 1 service.service_ref.save() @@ -100,12 +91,20 @@ class DbDriver(base.Driver): if getattr(service, 'model_disconnected', False): service.model_disconnected = False LOG.info( - _LI('Recovered connection to nova-conductor ' - 'for reporting service status.')) - - # the type of failure depends on use of remote or local conductor - except exc_cls: + _LI('Recovered from being unable to report status.')) + except messaging.MessagingTimeout: + # NOTE(johngarbutt) during upgrade we will see messaging timeouts + # as nova-conductor is restarted, so only log this error once. if not getattr(service, 'model_disconnected', False): service.model_disconnected = True LOG.warn(_LW('Lost connection to nova-conductor ' 'for reporting service status.')) + except Exception: + # NOTE(rpodolyaka): we'd like to avoid catching of all possible + # exceptions here, but otherwise it would become possible for + # the state reporting thread to stop abruptly, and thus leave + # the service unusable until it's restarted. + LOG.exception( + _LE('Unexpected error while reporting service status')) + # trigger the recovery log message, if this error goes away + service.model_disconnected = True diff --git a/nova/tests/unit/servicegroup/test_db_servicegroup.py b/nova/tests/unit/servicegroup/test_db_servicegroup.py index b53684d4af88..05f5b25f4dee 100644 --- a/nova/tests/unit/servicegroup/test_db_servicegroup.py +++ b/nova/tests/unit/servicegroup/test_db_servicegroup.py @@ -86,6 +86,7 @@ class DBServiceGroupTestCase(test.NoDBTestCase): fn(service) upd_mock.assert_called_once_with() self.assertEqual(11, service_ref.report_count) + self.assertFalse(service.model_disconnected) @mock.patch.object(objects.Service, 'save') def _test_report_state_error(self, exc_cls, upd_mock): @@ -96,12 +97,23 @@ class DBServiceGroupTestCase(test.NoDBTestCase): service_ref=service_ref) fn = self.servicegroup_api._driver._report_state fn(service) # fail if exception not caught + self.assertTrue(service.model_disconnected) def test_report_state_remote_error_handling(self): + # test error handling using remote conductor + self.flags(use_local=False, group='conductor') + self._test_report_state_error(messaging.RemoteError) + + def test_report_state_remote_error_handling_timeout(self): # test error handling using remote conductor self.flags(use_local=False, group='conductor') self._test_report_state_error(messaging.MessagingTimeout) + def test_report_state_remote_unexpected_error(self): + # unexpected errors must be handled, but disconnected flag not touched + self.flags(use_local=False, group='conductor') + self._test_report_state_error(RuntimeError) + def test_report_state_local_error_handling(self): # if using local conductor, the db driver must handle DB errors self.flags(use_local=True, group='conductor') @@ -109,3 +121,8 @@ class DBServiceGroupTestCase(test.NoDBTestCase): # mock an oslo.db DBError as it's an exception base class for # oslo.db DB errors (eg DBConnectionError) self._test_report_state_error(db_exception.DBError) + + def test_report_state_local_unexpected_error(self): + # unexpected errors must be handled, but disconnected flag not touched + self.flags(use_local=True, group='conductor') + self._test_report_state_error(RuntimeError)