Add timeout to all http requests

Currently, we generate lots of samples by polling data from other services,
but theses rest requests have no timeout limitation. We have observed that
some requests (for example, keystone due to openssl problem) may stuck for
over several days (maybe forever if we don't restart the service). Other
pollsters in same thread will not be able to work too. The worst thing is
that, when outside (keystone) service becomes normal, Ceilometer cannot
recover itself automatically, cloud operator needs to restart it manually.

So I strongly suggest that we should add timeout limit to **every** rest api
call, this is quite important to improve Ceilometer's robust and reliability.

This patch adds a new option named http_timeout, and applies it to almost
all http requests in Ceilometer project.

Change-Id: I76df2c0a9ffacb252e15edbb125e37ccb2aac4aa
Closes-Bug: #1388778
This commit is contained in:
ZhiQiang Fan 2014-11-03 20:57:04 +08:00 committed by ZhiQiang Fan
parent 2117ba760c
commit bd0244ffe6
15 changed files with 38 additions and 8 deletions

View File

@ -36,6 +36,7 @@ UNKNOWN = 'insufficient data'
OK = 'ok'
ALARM = 'alarm'
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
@ -61,6 +62,7 @@ class Evaluator(object):
os_cacert=auth_config.os_cacert,
os_endpoint_type=auth_config.os_endpoint_type,
insecure=auth_config.insecure,
timeout=cfg.CONF.http_timeout,
)
self.api_client = ceiloclient.get_client(2, **creds)
return self.api_client

View File

@ -21,6 +21,10 @@ from six.moves.urllib import parse
from ceilometer.alarm.notifier import rest
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
class TrustRestAlarmNotifier(rest.RestAlarmNotifier):
"""Notifier supporting keystone trust authentication.
@ -44,6 +48,7 @@ class TrustRestAlarmNotifier(rest.RestAlarmNotifier):
auth_url=auth_url,
region_name=cfg.CONF.service_credentials.os_region_name,
insecure=cfg.CONF.service_credentials.insecure,
timeout=cfg.CONF.http_timeout,
trust_id=trust_id)
# Remove the fake user

View File

@ -51,6 +51,8 @@ cfg.CONF.import_opt('partition_rpc_topic', 'ceilometer.alarm.rpc',
group='alarm')
cfg.CONF.import_opt('heartbeat', 'ceilometer.coordination',
group='coordination')
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -88,6 +90,7 @@ class AlarmService(object):
os_cacert=auth_config.os_cacert,
os_endpoint_type=auth_config.os_endpoint_type,
insecure=auth_config.insecure,
timeout=cfg.CONF.http_timeout,
)
self.api_client = ceiloclient.get_client(2, **creds)
return self.api_client

View File

@ -31,6 +31,7 @@ OPTS = [
'subset of pollsters should be loaded.'),
]
cfg.CONF.register_opts(OPTS, group='central')
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -52,7 +53,8 @@ class AgentManager(agent.AgentManager):
cacert=cfg.CONF.service_credentials.os_cacert,
auth_url=cfg.CONF.service_credentials.os_auth_url,
region_name=cfg.CONF.service_credentials.os_region_name,
insecure=cfg.CONF.service_credentials.insecure)
insecure=cfg.CONF.service_credentials.insecure,
timeout=cfg.CONF.http_timeout,)
except Exception as e:
self.keystone = e

View File

@ -23,6 +23,7 @@ from ceilometer.openstack.common.gettextutils import _
from ceilometer.openstack.common import log
from ceilometer import plugin
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -42,7 +43,8 @@ def _get_keystone():
cacert=cfg.CONF.service_credentials.os_cacert,
auth_url=cfg.CONF.service_credentials.os_auth_url,
region_name=cfg.CONF.service_credentials.os_region_name,
insecure=cfg.CONF.service_credentials.insecure)
insecure=cfg.CONF.service_credentials.insecure,
timeout=cfg.CONF.http_timeout)
except Exception as e:
return e

View File

@ -52,7 +52,8 @@ class KwapiClient(object):
headers = {}
if self.token is not None:
headers = {'X-Auth-Token': self.token}
request = requests.get(probes_url, headers=headers)
timeout = cfg.CONF.http_timeout
request = requests.get(probes_url, headers=headers, timeout=timeout)
message = request.json()
probes = message['probes']
for key, value in six.iteritems(probes):

View File

@ -62,7 +62,8 @@ class _Base(plugin.CentralPollster):
return glanceclient.Client('1', endpoint,
token=ksclient.auth_token,
cacert=service_credentials.os_cacert,
insecure=service_credentials.insecure)
insecure=service_credentials.insecure,
timeout=cfg.CONF.http_timeout)
def _get_images(self, ksclient, endpoint):
client = self.get_glance_client(ksclient, endpoint)

View File

@ -24,6 +24,7 @@ from ceilometer.openstack.common import log
CONF = cfg.CONF
CONF.import_opt('http_timeout', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -98,7 +99,8 @@ class AnalyticsAPIBaseClient(object):
'data': data,
'verify': self.verify_ssl,
'allow_redirects': False,
'cookies': cookies
'cookies': cookies,
'timeout': CONF.http_timeout,
}
return req_params

View File

@ -25,6 +25,7 @@ from ceilometer.openstack.common import log
CONF = cfg.CONF
CONF.import_opt('http_timeout', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -170,7 +171,8 @@ class Client():
req_params = {
'headers': {
'Accept': 'application/json'
}
},
'timeout': CONF.http_timeout,
}
auth_way = params.get('auth')

View File

@ -30,6 +30,7 @@ SERVICE_OPTS = [
]
cfg.CONF.register_opts(SERVICE_OPTS, group='service_types')
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -65,7 +66,8 @@ class Client(object):
'auth_url': conf.os_auth_url,
'region_name': conf.os_region_name,
'endpoint_type': conf.os_endpoint_type,
'service_type': cfg.CONF.service_types.neutron
'timeout': cfg.CONF.http_timeout,
'service_type': cfg.CONF.service_types.neutron,
}
if conf.os_tenant_id:

View File

@ -36,7 +36,7 @@ SERVICE_OPTS = [
cfg.CONF.register_opts(OPTS)
cfg.CONF.register_opts(SERVICE_OPTS, group='service_types')
cfg.CONF.import_opt('http_timeout', 'ceilometer.service')
cfg.CONF.import_group('service_credentials', 'ceilometer.service')
LOG = log.getLogger(__name__)
@ -74,6 +74,7 @@ class Client(object):
bypass_url=bypass_url,
cacert=conf.os_cacert,
insecure=conf.insecure,
timeout=cfg.CONF.http_timeout,
http_log_debug=cfg.CONF.nova_http_log_debug,
no_cache=True)

View File

@ -43,6 +43,10 @@ OPTS = [
default=1,
help='Number of workers for notification service. A single '
'notification agent is enabled by default.'),
cfg.IntOpt('http_timeout',
default=600,
help='Timeout seconds for HTTP requests. Set it to None to '
'disable timeout.'),
]
cfg.CONF.register_opts(OPTS)

View File

@ -360,6 +360,7 @@ class TestEvaluate(base.TestEvaluatorBase):
os_username=conf.os_username,
os_cacert=conf.os_cacert,
os_endpoint_type=conf.os_endpoint_type,
timeout=cfg.CONF.http_timeout,
insecure=conf.insecure)]
actual = client.call_args_list
self.assertEqual(expected, actual)

View File

@ -138,6 +138,7 @@ class TestAlarmEvaluationService(tests_base.BaseTestCase):
os_username=conf.os_username,
os_cacert=conf.os_cacert,
os_endpoint_type=conf.os_endpoint_type,
timeout=self.CONF.http_timeout,
insecure=conf.insecure)]
actual = client.call_args_list
self.assertEqual(expected, actual)

View File

@ -102,6 +102,7 @@ class TestSingletonAlarmService(tests_base.BaseTestCase):
os_username=conf.os_username,
os_cacert=conf.os_cacert,
os_endpoint_type=conf.os_endpoint_type,
timeout=self.CONF.http_timeout,
insecure=conf.insecure)]
actual = client.call_args_list
self.assertEqual(expected, actual)