From 197bb467c0fa33700e5397c934fa10d8c16f1fbc Mon Sep 17 00:00:00 2001 From: jichenjc Date: Sun, 21 Sep 2014 05:19:49 +0800 Subject: [PATCH] Use reasonable timeout for rpc service_update() nova.servicegroup.drivers.db.DbDriver._report_state() is called every service.report_interval seconds from a timer in order to periodically report the service state. It calls self.conductor_api.service_update(). If this ends up calling nova.conductor.rpcapi.ConductorAPI.service_update(), it will do an RPC call() to nova-conductor. If anything happens which causes the RPC reply to be lost or never sent in the first place, by default the RPC code will wait 60 seconds for a response (blocking the timer-based calling of _report_state() in the meantime). This is long enough to cause the status in the database to get old enough that other services consider this service to be "down". if rpc_reponse_timeout is smaller than report_interval then we could use the existing RPC timeout, but wait longer won't hurt. So the patch didn't check it and only use report_interval. Change-Id: I88743183bce1a534812cfe6110c3fc2892058c53 Closes-Bug: #1368989 --- nova/conductor/rpcapi.py | 15 ++++++++++++- nova/service.py | 2 +- nova/tests/conductor/test_conductor.py | 29 ++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/nova/conductor/rpcapi.py b/nova/conductor/rpcapi.py index 4a82e72ae0a7..5f1427edcdf8 100644 --- a/nova/conductor/rpcapi.py +++ b/nova/conductor/rpcapi.py @@ -276,7 +276,20 @@ class ConductorAPI(object): def service_update(self, context, service, values): service_p = jsonutils.to_primitive(service) - cctxt = self.client.prepare() + + # (NOTE:jichenjc)If we're calling this periodically, it makes no + # sense for the RPC timeout to be more than the service + # report interval. Select 5 here is only find a reaonable long + # interval as threshold. + timeout = CONF.report_interval + if timeout and timeout > 5: + timeout -= 1 + + if timeout: + cctxt = self.client.prepare(timeout=timeout) + else: + cctxt = self.client.prepare() + return cctxt.call(context, 'service_update', service=service_p, values=values) diff --git a/nova/service.py b/nova/service.py index cdb0b1f117e3..5f851fea325e 100644 --- a/nova/service.py +++ b/nova/service.py @@ -221,7 +221,7 @@ class Service(service.Service): 'host': self.host, 'binary': self.binary, 'topic': self.topic, - 'report_count': 0 + 'report_count': 0, } service = self.conductor_api.service_create(context, svc_values) self.service_id = service['id'] diff --git a/nova/tests/conductor/test_conductor.py b/nova/tests/conductor/test_conductor.py index 44c412bff006..599da303f5d6 100644 --- a/nova/tests/conductor/test_conductor.py +++ b/nova/tests/conductor/test_conductor.py @@ -19,6 +19,7 @@ import contextlib import mock import mox +from oslo.config import cfg from oslo import messaging from nova.api.ec2 import ec2utils @@ -59,6 +60,10 @@ from nova.tests import fake_utils from nova import utils +CONF = cfg.CONF +CONF.import_opt('report_interval', 'nova.service') + + FAKE_IMAGE_REF = 'fake-image-ref' @@ -863,6 +868,30 @@ class ConductorRPCAPITestCase(_BaseTestCase, test.TestCase): self.conductor.security_groups_trigger_handler(self.context, 'event', ['arg']) + @mock.patch.object(db, 'service_update') + @mock.patch('oslo.messaging.RPCClient.prepare') + def test_service_update_time_big(self, mock_prepare, mock_update): + CONF.set_override('report_interval', 10) + services = {'id': 1} + self.conductor.service_update(self.context, services, {}) + mock_prepare.assert_called_once_with(timeout=9) + + @mock.patch.object(db, 'service_update') + @mock.patch('oslo.messaging.RPCClient.prepare') + def test_service_update_time_small(self, mock_prepare, mock_update): + CONF.set_override('report_interval', 3) + services = {'id': 1} + self.conductor.service_update(self.context, services, {}) + mock_prepare.assert_called_once_with(timeout=3) + + @mock.patch.object(db, 'service_update') + @mock.patch('oslo.messaging.RPCClient.prepare') + def test_service_update_no_time(self, mock_prepare, mock_update): + CONF.set_override('report_interval', None) + services = {'id': 1} + self.conductor.service_update(self.context, services, {}) + mock_prepare.assert_called_once_with() + class ConductorAPITestCase(_BaseTestCase, test.TestCase): """Conductor API Tests."""