Add OSprofiler integration into Heat

This adds builtin rpc and db traces to Heat, as well as
some toplevel stack methods to aid in reading the output.

A 'profiler' config group is added to enable profiling.

Change-Id: Ie5c1c8f1931f59e4d4bcf1ec3b791f55984eb6d2
Closes-bug: #1363782
This commit is contained in:
Angus Salkeld 2014-09-18 11:29:58 +10:00
parent 8795f4d80f
commit a98d2c9c5d
15 changed files with 179 additions and 7 deletions

View File

@ -37,6 +37,7 @@ from oslo import i18n
from heat.common import config
from heat.common.i18n import _
from heat.common import messaging
from heat.common import profiler
from heat.common import wsgi
from heat.openstack.common import log as logging
from heat.openstack.common import systemd
@ -57,6 +58,7 @@ if __name__ == '__main__':
host = cfg.CONF.heat_api.bind_host
LOG.info(_('Starting Heat ReST API on %(host)s:%(port)s'),
{'host': host, 'port': port})
profiler.setup('heat-api', host)
server = wsgi.Server()
server.start(app, cfg.CONF.heat_api, default_port=port)
systemd.notify_once()

View File

@ -39,6 +39,7 @@ from oslo import i18n
from heat.common import config
from heat.common.i18n import _
from heat.common import messaging
from heat.common import profiler
from heat.common import wsgi
from heat.openstack.common import log as logging
from heat.openstack.common import systemd
@ -59,6 +60,7 @@ if __name__ == '__main__':
host = cfg.CONF.heat_api_cfn.bind_host
LOG.info(_('Starting Heat API on %(host)s:%(port)s'),
{'host': host, 'port': port})
profiler.setup('heat-api-cfn', host)
server = wsgi.Server()
server.start(app, cfg.CONF.heat_api_cfn, default_port=port)
systemd.notify_once()

View File

@ -39,6 +39,7 @@ from oslo import i18n
from heat.common import config
from heat.common.i18n import _
from heat.common import messaging
from heat.common import profiler
from heat.common import wsgi
from heat.openstack.common import log as logging
from heat.openstack.common import systemd
@ -59,6 +60,7 @@ if __name__ == '__main__':
host = cfg.CONF.heat_api_cloudwatch.bind_host
LOG.info(_('Starting Heat CloudWatch API on %(host)s:%(port)s'),
{'host': host, 'port': port})
profiler.setup('heat-api-cloudwatch', host)
server = wsgi.Server()
server.start(app, cfg.CONF.heat_api_cloudwatch, default_port=port)
systemd.notify_once()

View File

@ -36,6 +36,7 @@ from oslo.config import cfg
from oslo import i18n
from heat.common import messaging
from heat.common import profiler
from heat.openstack.common import log as logging
from heat.openstack.common import service
@ -53,6 +54,7 @@ if __name__ == '__main__':
from heat.engine import service as engine
profiler.setup('heat-engine', cfg.CONF.host)
srv = engine.EngineService(cfg.CONF.host, rpc_api.ENGINE_TOPIC)
launcher = service.launch(srv, workers=cfg.CONF.num_engine_workers)
if cfg.CONF.enable_cloud_watch_lite:

View File

@ -41,6 +41,32 @@ Adding the following lines to your `localrc` file will enable the ceilometer ser
enable_service ceilometer-acompute ceilometer-acentral ceilometer-collector ceilometer-api
enable_service ceilometer-alarm-notifier ceilometer-alarm-evaluator
Configure Devstack to enable OSprofiler
---------------------------------------
Add the profiler notifier to your Ceilometer to your config::
CEILOMETER_NOTIFICATION_TOPICS=notifications,profiler
Enable the profiler in /etc/heat/heat.conf::
$ echo -e "[profiler]\nprofiler_enabled = True\ntrace_sqlalchemy = True\n" >> /etc/heat/heat.conf
Change the default hmac_key in /etc/heat/api-paste.ini::
$ sed -i "s/hmac_keys =.*/hmac_keys = SECRET_KEY/" /etc/heat/api-paste.ini
Run any command with --profile SECRET_KEY::
$ heat --profile SECRET_KEY stack-list
# it will print <Trace ID>
Get pretty HTML with traces::
$ osprofiler trace show --html <Profile ID>
Note that osprofiler should be run with the admin user name & tenant.
Confirming Heat is responding
-----------------------------

View File

@ -1,7 +1,7 @@
# heat-api pipeline
[pipeline:heat-api]
pipeline = request_id faultwrap ssl versionnegotiation authurl authtoken context apiv1app
pipeline = request_id faultwrap ssl versionnegotiation osprofiler authurl authtoken context apiv1app
# heat-api pipeline for standalone heat
# ie. uses alternative auth backend that authenticates users against keystone
@ -24,7 +24,7 @@ pipeline = request_id faultwrap versionnegotiation context custombackendauth api
# heat-api-cfn pipeline
[pipeline:heat-api-cfn]
pipeline = cfnversionnegotiation ec2authtoken authtoken context apicfnv1app
pipeline = cfnversionnegotiation osprofiler ec2authtoken authtoken context apicfnv1app
# heat-api-cfn pipeline for standalone heat
# relies exclusively on authenticating with ec2 signed requests
@ -33,7 +33,7 @@ pipeline = cfnversionnegotiation ec2authtoken context apicfnv1app
# heat-api-cloudwatch pipeline
[pipeline:heat-api-cloudwatch]
pipeline = versionnegotiation ec2authtoken authtoken context apicwapp
pipeline = versionnegotiation osprofiler ec2authtoken authtoken context apicwapp
# heat-api-cloudwatch pipeline for standalone heat
# relies exclusively on authenticating with ec2 signed requests
@ -97,3 +97,8 @@ paste.filter_factory = heat.common.custom_backend_auth:filter_factory
# Middleware to set x-openstack-request-id in http response header
[filter:request_id]
paste.filter_factory = heat.openstack.common.middleware.request_id:RequestIdMiddleware.factory
[filter:osprofiler]
paste.filter_factory = osprofiler.web:WsgiMiddleware.factory
hmac_keys = SECRET_KEY
enabled = yes

View File

@ -1270,6 +1270,19 @@
#api_paste_config=api-paste.ini
[profiler]
#
# Options defined in heat.common.config
#
# If False fully disable profiling feature. (boolean value)
#profiler_enabled=false
# If False do not trace SQL requests. (boolean value)
#trace_sqlalchemy=false
[revision]
#

View File

@ -156,6 +156,14 @@ rpc_opts = [
'This can be an opaque identifier. '
'It is not necessarily a hostname, FQDN, or IP address.')]
profiler_group = cfg.OptGroup('profiler')
profiler_opts = [
cfg.BoolOpt("profiler_enabled", default=False,
help=_('If False fully disable profiling feature.')),
cfg.BoolOpt("trace_sqlalchemy", default=False,
help=_("If False do not trace SQL requests."))
]
auth_password_group = cfg.OptGroup('auth_password')
auth_password_opts = [
cfg.BoolOpt('multi_cloud',
@ -211,6 +219,7 @@ def list_opts():
yield paste_deploy_group.name, paste_deploy_opts
yield auth_password_group.name, auth_password_opts
yield revision_group.name, revision_opts
yield profiler_group.name, profiler_opts
yield 'clients', clients_opts
for client in ('nova', 'swift', 'neutron', 'cinder',
@ -226,6 +235,7 @@ def list_opts():
cfg.CONF.register_group(paste_deploy_group)
cfg.CONF.register_group(auth_password_group)
cfg.CONF.register_group(revision_group)
cfg.CONF.register_group(profiler_group)
for group, opts in list_opts():
cfg.CONF.register_opts(opts, group=group)

View File

@ -19,6 +19,7 @@ import eventlet
from oslo.config import cfg
import oslo.messaging
from oslo.serialization import jsonutils
from osprofiler import profiler
from heat.common import context
@ -49,10 +50,22 @@ class RequestContextSerializer(oslo.messaging.Serializer):
@staticmethod
def serialize_context(ctxt):
return ctxt.to_dict()
_context = ctxt.to_dict()
prof = profiler.get()
if prof:
trace_info = {
"hmac_key": prof.hmac_key,
"base_id": prof.get_base_id(),
"parent_id": prof.get_id()
}
_context.update({"trace_info": trace_info})
return _context
@staticmethod
def deserialize_context(ctxt):
trace_info = ctxt.pop("trace_info", None)
if trace_info:
profiler.init(**trace_info)
return context.RequestContext.from_dict(ctxt)

44
heat/common/profiler.py Normal file
View File

@ -0,0 +1,44 @@
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo.config import cfg
from oslo import messaging
import osprofiler.profiler
import osprofiler.web
from heat.common import context
from heat.common import messaging as rpc_messaging
from heat.openstack.common import log as logging
cfg.CONF.import_opt('profiler_enabled', 'heat.common.config', group='profiler')
LOG = logging.getLogger(__name__)
def setup(binary, host):
if cfg.CONF.profiler.profiler_enabled:
_notifier = osprofiler.notifier.create(
"Messaging", messaging, context.get_admin_context().to_dict(),
rpc_messaging.TRANSPORT, "heat", binary, host)
osprofiler.notifier.set(_notifier)
LOG.warning("OSProfiler is enabled.\nIt means that person who knows "
"any of hmac_keys that are specified in "
"/etc/heat/api-paste.ini can trace his requests. \n"
"In real life only operator can read this file so there "
"is no security issue. Note that even if person can "
"trigger profiler, only admin user can retrieve trace "
"information.\n"
"To disable OSprofiler set in heat.conf:\n"
"[profiler]\nenabled=false")
else:
osprofiler.web.disable()

View File

@ -19,6 +19,7 @@ import sys
from oslo.config import cfg
from oslo.db.sqlalchemy import session as db_session
from oslo.db.sqlalchemy import utils
import osprofiler.sqlalchemy
import sqlalchemy
from sqlalchemy import orm
from sqlalchemy.orm.session import Session
@ -33,6 +34,7 @@ from heat.rpc import api as rpc_api
CONF = cfg.CONF
CONF.import_opt('max_events_per_stack', 'heat.common.config')
CONF.import_group('profiler', 'heat.common.config')
_facade = None
@ -42,6 +44,12 @@ def get_facade():
if not _facade:
_facade = db_session.EngineFacade.from_config(CONF)
if CONF.profiler.profiler_enabled:
if CONF.profiler.trace_sqlalchemy:
osprofiler.sqlalchemy.add_tracing(sqlalchemy,
_facade.get_engine(),
"db")
return _facade
get_engine = lambda: get_facade().get_engine()

View File

@ -21,6 +21,7 @@ from oslo.config import cfg
from oslo import messaging
from oslo.serialization import jsonutils
from oslo.utils import timeutils
from osprofiler import profiler
import requests
import six
import warnings
@ -93,13 +94,31 @@ class ThreadGroupManager(object):
"""
pass
def _serialize_profile_info(self):
prof = profiler.get()
trace_info = None
if prof:
trace_info = {
"hmac_key": prof.hmac_key,
"base_id": prof.get_base_id(),
"parent_id": prof.get_id()
}
return trace_info
def _start_with_trace(self, trace, func, *args, **kwargs):
if trace:
profiler.init(**trace)
return func(*args, **kwargs)
def start(self, stack_id, func, *args, **kwargs):
"""
Run the given method in a sub-thread.
"""
if stack_id not in self.groups:
self.groups[stack_id] = threadgroup.ThreadGroup()
return self.groups[stack_id].add_thread(func, *args, **kwargs)
return self.groups[stack_id].add_thread(self._start_with_trace,
self._serialize_profile_info(),
func, *args, **kwargs)
def start_with_lock(self, cnxt, stack, engine_id, func, *args, **kwargs):
"""
@ -268,6 +287,7 @@ class StackWatch(object):
self.check_stack_watches(sid)
@profiler.trace_cls("rpc")
class EngineListener(service.Service):
'''
Listen on an AMQP queue named for the engine. Allows individual
@ -305,6 +325,7 @@ class EngineListener(service.Service):
self.thread_group_mgr.send(stack_id, message)
@profiler.trace_cls("rpc")
class EngineService(service.Service):
"""
Manages the running instances from creation to destruction.

View File

@ -19,6 +19,7 @@ import warnings
from oslo.config import cfg
from oslo.utils import encodeutils
from osprofiler import profiler
import six
from heat.common import context as common_context
@ -282,6 +283,7 @@ class Stack(collections.Mapping):
use_stored_context=use_stored_context,
username=stack.username)
@profiler.trace('Stack.store', hide_args=False)
def store(self, backup=False):
'''
Store the stack in the database and return its ID
@ -426,6 +428,7 @@ class Stack(collections.Mapping):
handler = self._access_allowed_handlers.get(credential_id)
return handler and handler(resource_name)
@profiler.trace('Stack.validate', hide_args=False)
def validate(self):
'''
Validates the template.
@ -479,6 +482,7 @@ class Stack(collections.Mapping):
'''
return any(res.requires_deferred_auth for res in self.values())
@profiler.trace('Stack.state_set', hide_args=False)
def state_set(self, action, status, reason):
'''Update the stack state in the database.'''
if action not in self.ACTIONS:
@ -527,6 +531,7 @@ class Stack(collections.Mapping):
return [resource.preview()
for resource in self.resources.itervalues()]
@profiler.trace('Stack.create', hide_args=False)
def create(self):
'''
Create the stack and all of the resources.
@ -607,6 +612,7 @@ class Stack(collections.Mapping):
lifecycle_plugin_utils.do_post_ops(self.context, self, None, action,
(self.status == self.FAILED))
@profiler.trace('Stack.check', hide_args=False)
def check(self):
self.updated_time = datetime.utcnow()
checker = scheduler.TaskRunner(self.stack_task, self.CHECK,
@ -631,6 +637,7 @@ class Stack(collections.Mapping):
return all(supported)
@profiler.trace('Stack._backup_stack', hide_args=False)
def _backup_stack(self, create_if_missing=True):
'''
Get a Stack containing any in-progress resources from the previous
@ -652,6 +659,7 @@ class Stack(collections.Mapping):
else:
return None
@profiler.trace('Stack.adopt', hide_args=False)
def adopt(self):
'''
Adopt a stack (create stack with all the existing resources).
@ -668,6 +676,7 @@ class Stack(collections.Mapping):
post_func=rollback)
creator(timeout=self.timeout_secs())
@profiler.trace('Stack.update', hide_args=False)
def update(self, newstack, event=None):
'''
Compare the current stack with newstack,
@ -795,6 +804,7 @@ class Stack(collections.Mapping):
notification.send(self)
@profiler.trace('Stack.delete', hide_args=False)
def delete(self, action=DELETE, backup=False, abandon=False):
'''
Delete all of the resources, and then the stack itself.
@ -969,6 +979,7 @@ class Stack(collections.Mapping):
"%s ") % self.id)
self.id = None
@profiler.trace('Stack.suspend', hide_args=False)
def suspend(self):
'''
Suspend the stack, which invokes handle_suspend for all stack resources
@ -988,6 +999,7 @@ class Stack(collections.Mapping):
reverse=True)
sus_task(timeout=self.timeout_secs())
@profiler.trace('Stack.resume', hide_args=False)
def resume(self):
'''
Resume the stack, which invokes handle_resume for all stack resources
@ -1007,6 +1019,7 @@ class Stack(collections.Mapping):
reverse=False)
sus_task(timeout=self.timeout_secs())
@profiler.trace('Stack.snapshot', hide_args=False)
def snapshot(self):
'''Snapshot the stack, invoking handle_snapshot on all resources.'''
sus_task = scheduler.TaskRunner(self.stack_task,
@ -1014,12 +1027,14 @@ class Stack(collections.Mapping):
reverse=False)
sus_task(timeout=self.timeout_secs())
@profiler.trace('Stack.delete_snapshot', hide_args=False)
def delete_snapshot(self, snapshot):
'''Remove a snapshot from the backends.'''
for name, rsrc in self.resources.iteritems():
data = snapshot.data['resources'].get(name)
scheduler.TaskRunner(rsrc.delete_snapshot, data)()
@profiler.trace('Stack.output', hide_args=False)
def output(self, key):
'''
Get the value of the specified stack output.
@ -1074,11 +1089,13 @@ class Stack(collections.Mapping):
self.stack_user_project_id = project_id
self.store()
@profiler.trace('Stack.create_stack_user_project_id', hide_args=False)
def create_stack_user_project_id(self):
project_id = self.clients.client(
'keystone').create_stack_domain_project(self.id)
self.set_stack_user_project_id(project_id)
@profiler.trace('Stack.prepare_abandon', hide_args=False)
def prepare_abandon(self):
return {
'name': self.name,

View File

@ -353,6 +353,11 @@ class DummyThreadGroup(object):
pass
def add_thread(self, callback, *args, **kwargs):
# just to make _start_with_trace() easier to test:
# callback == _start_with_trace
# args[0] == trace_info
# args[1] == actual_callback
callback = args[1]
self.threads.append(callback)
return DummyThread()
@ -3459,8 +3464,9 @@ class ThreadGroupManagerTest(HeatTestCase):
ret = thm.start(stack_id, self.f, *self.fargs, **self.fkwargs)
self.assertEqual(self.tg_mock, thm.groups['test'])
self.tg_mock.add_thread.assert_called_with(self.f, *self.fargs,
**self.fkwargs)
self.tg_mock.add_thread.assert_called_with(
thm._start_with_trace, None,
self.f, *self.fargs, **self.fkwargs)
self.assertEqual(self.tg_mock.add_thread(), ret)
def test_tgm_add_timer(self):

View File

@ -19,6 +19,7 @@ oslo.i18n>=1.0.0 # Apache-2.0
oslo.messaging>=1.4.0
oslo.serialization>=1.0.0 # Apache-2.0
oslo.utils>=1.0.0 # Apache-2.0
osprofiler>=0.3.0
PasteDeploy>=1.5.0
posix_ipc
pycrypto>=2.6