Merge "servicegroup: stop zombie service due to exception"
This commit is contained in:
commit
0b69e8b863
@ -14,20 +14,18 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_db import exception as db_exception
|
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
import oslo_messaging as messaging
|
import oslo_messaging as messaging
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from nova.i18n import _, _LI, _LW
|
from nova.i18n import _, _LI, _LW, _LE
|
||||||
from nova.servicegroup import api
|
from nova.servicegroup import api
|
||||||
from nova.servicegroup.drivers import base
|
from nova.servicegroup.drivers import base
|
||||||
|
|
||||||
|
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
CONF.import_opt('service_down_time', 'nova.service')
|
CONF.import_opt('service_down_time', 'nova.service')
|
||||||
CONF.import_opt('use_local', 'nova.conductor.api', group='conductor')
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -85,13 +83,6 @@ class DbDriver(base.Driver):
|
|||||||
def _report_state(self, service):
|
def _report_state(self, service):
|
||||||
"""Update the state of this service in the datastore."""
|
"""Update the state of this service in the datastore."""
|
||||||
|
|
||||||
if CONF.conductor.use_local:
|
|
||||||
# need to catch DB type errors
|
|
||||||
exc_cls = db_exception.DBError # oslo.db exception base class
|
|
||||||
else:
|
|
||||||
# need to catch messaging timeouts
|
|
||||||
exc_cls = messaging.MessagingTimeout
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
service.service_ref.report_count += 1
|
service.service_ref.report_count += 1
|
||||||
service.service_ref.save()
|
service.service_ref.save()
|
||||||
@ -100,12 +91,20 @@ class DbDriver(base.Driver):
|
|||||||
if getattr(service, 'model_disconnected', False):
|
if getattr(service, 'model_disconnected', False):
|
||||||
service.model_disconnected = False
|
service.model_disconnected = False
|
||||||
LOG.info(
|
LOG.info(
|
||||||
_LI('Recovered connection to nova-conductor '
|
_LI('Recovered from being unable to report status.'))
|
||||||
'for reporting service status.'))
|
except messaging.MessagingTimeout:
|
||||||
|
# NOTE(johngarbutt) during upgrade we will see messaging timeouts
|
||||||
# the type of failure depends on use of remote or local conductor
|
# as nova-conductor is restarted, so only log this error once.
|
||||||
except exc_cls:
|
|
||||||
if not getattr(service, 'model_disconnected', False):
|
if not getattr(service, 'model_disconnected', False):
|
||||||
service.model_disconnected = True
|
service.model_disconnected = True
|
||||||
LOG.warn(_LW('Lost connection to nova-conductor '
|
LOG.warn(_LW('Lost connection to nova-conductor '
|
||||||
'for reporting service status.'))
|
'for reporting service status.'))
|
||||||
|
except Exception:
|
||||||
|
# NOTE(rpodolyaka): we'd like to avoid catching of all possible
|
||||||
|
# exceptions here, but otherwise it would become possible for
|
||||||
|
# the state reporting thread to stop abruptly, and thus leave
|
||||||
|
# the service unusable until it's restarted.
|
||||||
|
LOG.exception(
|
||||||
|
_LE('Unexpected error while reporting service status'))
|
||||||
|
# trigger the recovery log message, if this error goes away
|
||||||
|
service.model_disconnected = True
|
||||||
|
@ -86,6 +86,7 @@ class DBServiceGroupTestCase(test.NoDBTestCase):
|
|||||||
fn(service)
|
fn(service)
|
||||||
upd_mock.assert_called_once_with()
|
upd_mock.assert_called_once_with()
|
||||||
self.assertEqual(11, service_ref.report_count)
|
self.assertEqual(11, service_ref.report_count)
|
||||||
|
self.assertFalse(service.model_disconnected)
|
||||||
|
|
||||||
@mock.patch.object(objects.Service, 'save')
|
@mock.patch.object(objects.Service, 'save')
|
||||||
def _test_report_state_error(self, exc_cls, upd_mock):
|
def _test_report_state_error(self, exc_cls, upd_mock):
|
||||||
@ -96,12 +97,23 @@ class DBServiceGroupTestCase(test.NoDBTestCase):
|
|||||||
service_ref=service_ref)
|
service_ref=service_ref)
|
||||||
fn = self.servicegroup_api._driver._report_state
|
fn = self.servicegroup_api._driver._report_state
|
||||||
fn(service) # fail if exception not caught
|
fn(service) # fail if exception not caught
|
||||||
|
self.assertTrue(service.model_disconnected)
|
||||||
|
|
||||||
def test_report_state_remote_error_handling(self):
|
def test_report_state_remote_error_handling(self):
|
||||||
|
# test error handling using remote conductor
|
||||||
|
self.flags(use_local=False, group='conductor')
|
||||||
|
self._test_report_state_error(messaging.RemoteError)
|
||||||
|
|
||||||
|
def test_report_state_remote_error_handling_timeout(self):
|
||||||
# test error handling using remote conductor
|
# test error handling using remote conductor
|
||||||
self.flags(use_local=False, group='conductor')
|
self.flags(use_local=False, group='conductor')
|
||||||
self._test_report_state_error(messaging.MessagingTimeout)
|
self._test_report_state_error(messaging.MessagingTimeout)
|
||||||
|
|
||||||
|
def test_report_state_remote_unexpected_error(self):
|
||||||
|
# unexpected errors must be handled, but disconnected flag not touched
|
||||||
|
self.flags(use_local=False, group='conductor')
|
||||||
|
self._test_report_state_error(RuntimeError)
|
||||||
|
|
||||||
def test_report_state_local_error_handling(self):
|
def test_report_state_local_error_handling(self):
|
||||||
# if using local conductor, the db driver must handle DB errors
|
# if using local conductor, the db driver must handle DB errors
|
||||||
self.flags(use_local=True, group='conductor')
|
self.flags(use_local=True, group='conductor')
|
||||||
@ -109,3 +121,8 @@ class DBServiceGroupTestCase(test.NoDBTestCase):
|
|||||||
# mock an oslo.db DBError as it's an exception base class for
|
# mock an oslo.db DBError as it's an exception base class for
|
||||||
# oslo.db DB errors (eg DBConnectionError)
|
# oslo.db DB errors (eg DBConnectionError)
|
||||||
self._test_report_state_error(db_exception.DBError)
|
self._test_report_state_error(db_exception.DBError)
|
||||||
|
|
||||||
|
def test_report_state_local_unexpected_error(self):
|
||||||
|
# unexpected errors must be handled, but disconnected flag not touched
|
||||||
|
self.flags(use_local=True, group='conductor')
|
||||||
|
self._test_report_state_error(RuntimeError)
|
||||||
|
Loading…
Reference in New Issue
Block a user