From a98d2c9c5df5179b207ec8d2df9be42312fab336 Mon Sep 17 00:00:00 2001 From: Angus Salkeld Date: Thu, 18 Sep 2014 11:29:58 +1000 Subject: [PATCH] Add OSprofiler integration into Heat This adds builtin rpc and db traces to Heat, as well as some toplevel stack methods to aid in reading the output. A 'profiler' config group is added to enable profiling. Change-Id: Ie5c1c8f1931f59e4d4bcf1ec3b791f55984eb6d2 Closes-bug: #1363782 --- bin/heat-api | 2 + bin/heat-api-cfn | 2 + bin/heat-api-cloudwatch | 2 + bin/heat-engine | 2 + doc/source/getting_started/on_devstack.rst | 26 +++++++++++++ etc/heat/api-paste.ini | 11 ++++-- etc/heat/heat.conf.sample | 13 +++++++ heat/common/config.py | 10 +++++ heat/common/messaging.py | 15 +++++++- heat/common/profiler.py | 44 ++++++++++++++++++++++ heat/db/sqlalchemy/api.py | 8 ++++ heat/engine/service.py | 23 ++++++++++- heat/engine/stack.py | 17 +++++++++ heat/tests/test_engine_service.py | 10 ++++- requirements.txt | 1 + 15 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 heat/common/profiler.py diff --git a/bin/heat-api b/bin/heat-api index 18c619156..7444f666c 100755 --- a/bin/heat-api +++ b/bin/heat-api @@ -37,6 +37,7 @@ from oslo import i18n from heat.common import config from heat.common.i18n import _ from heat.common import messaging +from heat.common import profiler from heat.common import wsgi from heat.openstack.common import log as logging from heat.openstack.common import systemd @@ -57,6 +58,7 @@ if __name__ == '__main__': host = cfg.CONF.heat_api.bind_host LOG.info(_('Starting Heat ReST API on %(host)s:%(port)s'), {'host': host, 'port': port}) + profiler.setup('heat-api', host) server = wsgi.Server() server.start(app, cfg.CONF.heat_api, default_port=port) systemd.notify_once() diff --git a/bin/heat-api-cfn b/bin/heat-api-cfn index fa0dd60e6..b21080777 100755 --- a/bin/heat-api-cfn +++ b/bin/heat-api-cfn @@ -39,6 +39,7 @@ from oslo import i18n from heat.common import config from heat.common.i18n import _ from heat.common import messaging +from heat.common import profiler from heat.common import wsgi from heat.openstack.common import log as logging from heat.openstack.common import systemd @@ -59,6 +60,7 @@ if __name__ == '__main__': host = cfg.CONF.heat_api_cfn.bind_host LOG.info(_('Starting Heat API on %(host)s:%(port)s'), {'host': host, 'port': port}) + profiler.setup('heat-api-cfn', host) server = wsgi.Server() server.start(app, cfg.CONF.heat_api_cfn, default_port=port) systemd.notify_once() diff --git a/bin/heat-api-cloudwatch b/bin/heat-api-cloudwatch index c7d933a2b..fea213bc2 100755 --- a/bin/heat-api-cloudwatch +++ b/bin/heat-api-cloudwatch @@ -39,6 +39,7 @@ from oslo import i18n from heat.common import config from heat.common.i18n import _ from heat.common import messaging +from heat.common import profiler from heat.common import wsgi from heat.openstack.common import log as logging from heat.openstack.common import systemd @@ -59,6 +60,7 @@ if __name__ == '__main__': host = cfg.CONF.heat_api_cloudwatch.bind_host LOG.info(_('Starting Heat CloudWatch API on %(host)s:%(port)s'), {'host': host, 'port': port}) + profiler.setup('heat-api-cloudwatch', host) server = wsgi.Server() server.start(app, cfg.CONF.heat_api_cloudwatch, default_port=port) systemd.notify_once() diff --git a/bin/heat-engine b/bin/heat-engine index fbdcd1b38..52f5beabf 100755 --- a/bin/heat-engine +++ b/bin/heat-engine @@ -36,6 +36,7 @@ from oslo.config import cfg from oslo import i18n from heat.common import messaging +from heat.common import profiler from heat.openstack.common import log as logging from heat.openstack.common import service @@ -53,6 +54,7 @@ if __name__ == '__main__': from heat.engine import service as engine + profiler.setup('heat-engine', cfg.CONF.host) srv = engine.EngineService(cfg.CONF.host, rpc_api.ENGINE_TOPIC) launcher = service.launch(srv, workers=cfg.CONF.num_engine_workers) if cfg.CONF.enable_cloud_watch_lite: diff --git a/doc/source/getting_started/on_devstack.rst b/doc/source/getting_started/on_devstack.rst index ef37cf7d6..8c303b506 100644 --- a/doc/source/getting_started/on_devstack.rst +++ b/doc/source/getting_started/on_devstack.rst @@ -41,6 +41,32 @@ Adding the following lines to your `localrc` file will enable the ceilometer ser enable_service ceilometer-acompute ceilometer-acentral ceilometer-collector ceilometer-api enable_service ceilometer-alarm-notifier ceilometer-alarm-evaluator +Configure Devstack to enable OSprofiler +--------------------------------------- + +Add the profiler notifier to your Ceilometer to your config:: + + CEILOMETER_NOTIFICATION_TOPICS=notifications,profiler + +Enable the profiler in /etc/heat/heat.conf:: + + $ echo -e "[profiler]\nprofiler_enabled = True\ntrace_sqlalchemy = True\n" >> /etc/heat/heat.conf + +Change the default hmac_key in /etc/heat/api-paste.ini:: + + $ sed -i "s/hmac_keys =.*/hmac_keys = SECRET_KEY/" /etc/heat/api-paste.ini + +Run any command with --profile SECRET_KEY:: + + $ heat --profile SECRET_KEY stack-list + # it will print + +Get pretty HTML with traces:: + + $ osprofiler trace show --html + +Note that osprofiler should be run with the admin user name & tenant. + Confirming Heat is responding ----------------------------- diff --git a/etc/heat/api-paste.ini b/etc/heat/api-paste.ini index 224c4572f..42260da72 100644 --- a/etc/heat/api-paste.ini +++ b/etc/heat/api-paste.ini @@ -1,7 +1,7 @@ # heat-api pipeline [pipeline:heat-api] -pipeline = request_id faultwrap ssl versionnegotiation authurl authtoken context apiv1app +pipeline = request_id faultwrap ssl versionnegotiation osprofiler authurl authtoken context apiv1app # heat-api pipeline for standalone heat # ie. uses alternative auth backend that authenticates users against keystone @@ -24,7 +24,7 @@ pipeline = request_id faultwrap versionnegotiation context custombackendauth api # heat-api-cfn pipeline [pipeline:heat-api-cfn] -pipeline = cfnversionnegotiation ec2authtoken authtoken context apicfnv1app +pipeline = cfnversionnegotiation osprofiler ec2authtoken authtoken context apicfnv1app # heat-api-cfn pipeline for standalone heat # relies exclusively on authenticating with ec2 signed requests @@ -33,7 +33,7 @@ pipeline = cfnversionnegotiation ec2authtoken context apicfnv1app # heat-api-cloudwatch pipeline [pipeline:heat-api-cloudwatch] -pipeline = versionnegotiation ec2authtoken authtoken context apicwapp +pipeline = versionnegotiation osprofiler ec2authtoken authtoken context apicwapp # heat-api-cloudwatch pipeline for standalone heat # relies exclusively on authenticating with ec2 signed requests @@ -97,3 +97,8 @@ paste.filter_factory = heat.common.custom_backend_auth:filter_factory # Middleware to set x-openstack-request-id in http response header [filter:request_id] paste.filter_factory = heat.openstack.common.middleware.request_id:RequestIdMiddleware.factory + +[filter:osprofiler] +paste.filter_factory = osprofiler.web:WsgiMiddleware.factory +hmac_keys = SECRET_KEY +enabled = yes \ No newline at end of file diff --git a/etc/heat/heat.conf.sample b/etc/heat/heat.conf.sample index cce268340..5afda5c50 100644 --- a/etc/heat/heat.conf.sample +++ b/etc/heat/heat.conf.sample @@ -1270,6 +1270,19 @@ #api_paste_config=api-paste.ini +[profiler] + +# +# Options defined in heat.common.config +# + +# If False fully disable profiling feature. (boolean value) +#profiler_enabled=false + +# If False do not trace SQL requests. (boolean value) +#trace_sqlalchemy=false + + [revision] # diff --git a/heat/common/config.py b/heat/common/config.py index dfc96960d..42e89e462 100644 --- a/heat/common/config.py +++ b/heat/common/config.py @@ -156,6 +156,14 @@ rpc_opts = [ 'This can be an opaque identifier. ' 'It is not necessarily a hostname, FQDN, or IP address.')] +profiler_group = cfg.OptGroup('profiler') +profiler_opts = [ + cfg.BoolOpt("profiler_enabled", default=False, + help=_('If False fully disable profiling feature.')), + cfg.BoolOpt("trace_sqlalchemy", default=False, + help=_("If False do not trace SQL requests.")) +] + auth_password_group = cfg.OptGroup('auth_password') auth_password_opts = [ cfg.BoolOpt('multi_cloud', @@ -211,6 +219,7 @@ def list_opts(): yield paste_deploy_group.name, paste_deploy_opts yield auth_password_group.name, auth_password_opts yield revision_group.name, revision_opts + yield profiler_group.name, profiler_opts yield 'clients', clients_opts for client in ('nova', 'swift', 'neutron', 'cinder', @@ -226,6 +235,7 @@ def list_opts(): cfg.CONF.register_group(paste_deploy_group) cfg.CONF.register_group(auth_password_group) cfg.CONF.register_group(revision_group) +cfg.CONF.register_group(profiler_group) for group, opts in list_opts(): cfg.CONF.register_opts(opts, group=group) diff --git a/heat/common/messaging.py b/heat/common/messaging.py index bc6a33cb8..ca4b0f1f7 100644 --- a/heat/common/messaging.py +++ b/heat/common/messaging.py @@ -19,6 +19,7 @@ import eventlet from oslo.config import cfg import oslo.messaging from oslo.serialization import jsonutils +from osprofiler import profiler from heat.common import context @@ -49,10 +50,22 @@ class RequestContextSerializer(oslo.messaging.Serializer): @staticmethod def serialize_context(ctxt): - return ctxt.to_dict() + _context = ctxt.to_dict() + prof = profiler.get() + if prof: + trace_info = { + "hmac_key": prof.hmac_key, + "base_id": prof.get_base_id(), + "parent_id": prof.get_id() + } + _context.update({"trace_info": trace_info}) + return _context @staticmethod def deserialize_context(ctxt): + trace_info = ctxt.pop("trace_info", None) + if trace_info: + profiler.init(**trace_info) return context.RequestContext.from_dict(ctxt) diff --git a/heat/common/profiler.py b/heat/common/profiler.py new file mode 100644 index 000000000..bfdad9187 --- /dev/null +++ b/heat/common/profiler.py @@ -0,0 +1,44 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo.config import cfg +from oslo import messaging +import osprofiler.profiler +import osprofiler.web + +from heat.common import context +from heat.common import messaging as rpc_messaging +from heat.openstack.common import log as logging + +cfg.CONF.import_opt('profiler_enabled', 'heat.common.config', group='profiler') + +LOG = logging.getLogger(__name__) + + +def setup(binary, host): + if cfg.CONF.profiler.profiler_enabled: + _notifier = osprofiler.notifier.create( + "Messaging", messaging, context.get_admin_context().to_dict(), + rpc_messaging.TRANSPORT, "heat", binary, host) + osprofiler.notifier.set(_notifier) + LOG.warning("OSProfiler is enabled.\nIt means that person who knows " + "any of hmac_keys that are specified in " + "/etc/heat/api-paste.ini can trace his requests. \n" + "In real life only operator can read this file so there " + "is no security issue. Note that even if person can " + "trigger profiler, only admin user can retrieve trace " + "information.\n" + "To disable OSprofiler set in heat.conf:\n" + "[profiler]\nenabled=false") + else: + osprofiler.web.disable() diff --git a/heat/db/sqlalchemy/api.py b/heat/db/sqlalchemy/api.py index 09d218d69..3f8931a59 100644 --- a/heat/db/sqlalchemy/api.py +++ b/heat/db/sqlalchemy/api.py @@ -19,6 +19,7 @@ import sys from oslo.config import cfg from oslo.db.sqlalchemy import session as db_session from oslo.db.sqlalchemy import utils +import osprofiler.sqlalchemy import sqlalchemy from sqlalchemy import orm from sqlalchemy.orm.session import Session @@ -33,6 +34,7 @@ from heat.rpc import api as rpc_api CONF = cfg.CONF CONF.import_opt('max_events_per_stack', 'heat.common.config') +CONF.import_group('profiler', 'heat.common.config') _facade = None @@ -42,6 +44,12 @@ def get_facade(): if not _facade: _facade = db_session.EngineFacade.from_config(CONF) + if CONF.profiler.profiler_enabled: + if CONF.profiler.trace_sqlalchemy: + osprofiler.sqlalchemy.add_tracing(sqlalchemy, + _facade.get_engine(), + "db") + return _facade get_engine = lambda: get_facade().get_engine() diff --git a/heat/engine/service.py b/heat/engine/service.py index cde2e029e..a924b0d00 100644 --- a/heat/engine/service.py +++ b/heat/engine/service.py @@ -21,6 +21,7 @@ from oslo.config import cfg from oslo import messaging from oslo.serialization import jsonutils from oslo.utils import timeutils +from osprofiler import profiler import requests import six import warnings @@ -93,13 +94,31 @@ class ThreadGroupManager(object): """ pass + def _serialize_profile_info(self): + prof = profiler.get() + trace_info = None + if prof: + trace_info = { + "hmac_key": prof.hmac_key, + "base_id": prof.get_base_id(), + "parent_id": prof.get_id() + } + return trace_info + + def _start_with_trace(self, trace, func, *args, **kwargs): + if trace: + profiler.init(**trace) + return func(*args, **kwargs) + def start(self, stack_id, func, *args, **kwargs): """ Run the given method in a sub-thread. """ if stack_id not in self.groups: self.groups[stack_id] = threadgroup.ThreadGroup() - return self.groups[stack_id].add_thread(func, *args, **kwargs) + return self.groups[stack_id].add_thread(self._start_with_trace, + self._serialize_profile_info(), + func, *args, **kwargs) def start_with_lock(self, cnxt, stack, engine_id, func, *args, **kwargs): """ @@ -268,6 +287,7 @@ class StackWatch(object): self.check_stack_watches(sid) +@profiler.trace_cls("rpc") class EngineListener(service.Service): ''' Listen on an AMQP queue named for the engine. Allows individual @@ -305,6 +325,7 @@ class EngineListener(service.Service): self.thread_group_mgr.send(stack_id, message) +@profiler.trace_cls("rpc") class EngineService(service.Service): """ Manages the running instances from creation to destruction. diff --git a/heat/engine/stack.py b/heat/engine/stack.py index c7902a8ed..c4ec01e1b 100644 --- a/heat/engine/stack.py +++ b/heat/engine/stack.py @@ -19,6 +19,7 @@ import warnings from oslo.config import cfg from oslo.utils import encodeutils +from osprofiler import profiler import six from heat.common import context as common_context @@ -282,6 +283,7 @@ class Stack(collections.Mapping): use_stored_context=use_stored_context, username=stack.username) + @profiler.trace('Stack.store', hide_args=False) def store(self, backup=False): ''' Store the stack in the database and return its ID @@ -426,6 +428,7 @@ class Stack(collections.Mapping): handler = self._access_allowed_handlers.get(credential_id) return handler and handler(resource_name) + @profiler.trace('Stack.validate', hide_args=False) def validate(self): ''' Validates the template. @@ -479,6 +482,7 @@ class Stack(collections.Mapping): ''' return any(res.requires_deferred_auth for res in self.values()) + @profiler.trace('Stack.state_set', hide_args=False) def state_set(self, action, status, reason): '''Update the stack state in the database.''' if action not in self.ACTIONS: @@ -527,6 +531,7 @@ class Stack(collections.Mapping): return [resource.preview() for resource in self.resources.itervalues()] + @profiler.trace('Stack.create', hide_args=False) def create(self): ''' Create the stack and all of the resources. @@ -607,6 +612,7 @@ class Stack(collections.Mapping): lifecycle_plugin_utils.do_post_ops(self.context, self, None, action, (self.status == self.FAILED)) + @profiler.trace('Stack.check', hide_args=False) def check(self): self.updated_time = datetime.utcnow() checker = scheduler.TaskRunner(self.stack_task, self.CHECK, @@ -631,6 +637,7 @@ class Stack(collections.Mapping): return all(supported) + @profiler.trace('Stack._backup_stack', hide_args=False) def _backup_stack(self, create_if_missing=True): ''' Get a Stack containing any in-progress resources from the previous @@ -652,6 +659,7 @@ class Stack(collections.Mapping): else: return None + @profiler.trace('Stack.adopt', hide_args=False) def adopt(self): ''' Adopt a stack (create stack with all the existing resources). @@ -668,6 +676,7 @@ class Stack(collections.Mapping): post_func=rollback) creator(timeout=self.timeout_secs()) + @profiler.trace('Stack.update', hide_args=False) def update(self, newstack, event=None): ''' Compare the current stack with newstack, @@ -795,6 +804,7 @@ class Stack(collections.Mapping): notification.send(self) + @profiler.trace('Stack.delete', hide_args=False) def delete(self, action=DELETE, backup=False, abandon=False): ''' Delete all of the resources, and then the stack itself. @@ -969,6 +979,7 @@ class Stack(collections.Mapping): "%s ") % self.id) self.id = None + @profiler.trace('Stack.suspend', hide_args=False) def suspend(self): ''' Suspend the stack, which invokes handle_suspend for all stack resources @@ -988,6 +999,7 @@ class Stack(collections.Mapping): reverse=True) sus_task(timeout=self.timeout_secs()) + @profiler.trace('Stack.resume', hide_args=False) def resume(self): ''' Resume the stack, which invokes handle_resume for all stack resources @@ -1007,6 +1019,7 @@ class Stack(collections.Mapping): reverse=False) sus_task(timeout=self.timeout_secs()) + @profiler.trace('Stack.snapshot', hide_args=False) def snapshot(self): '''Snapshot the stack, invoking handle_snapshot on all resources.''' sus_task = scheduler.TaskRunner(self.stack_task, @@ -1014,12 +1027,14 @@ class Stack(collections.Mapping): reverse=False) sus_task(timeout=self.timeout_secs()) + @profiler.trace('Stack.delete_snapshot', hide_args=False) def delete_snapshot(self, snapshot): '''Remove a snapshot from the backends.''' for name, rsrc in self.resources.iteritems(): data = snapshot.data['resources'].get(name) scheduler.TaskRunner(rsrc.delete_snapshot, data)() + @profiler.trace('Stack.output', hide_args=False) def output(self, key): ''' Get the value of the specified stack output. @@ -1074,11 +1089,13 @@ class Stack(collections.Mapping): self.stack_user_project_id = project_id self.store() + @profiler.trace('Stack.create_stack_user_project_id', hide_args=False) def create_stack_user_project_id(self): project_id = self.clients.client( 'keystone').create_stack_domain_project(self.id) self.set_stack_user_project_id(project_id) + @profiler.trace('Stack.prepare_abandon', hide_args=False) def prepare_abandon(self): return { 'name': self.name, diff --git a/heat/tests/test_engine_service.py b/heat/tests/test_engine_service.py index 8c5683474..15155af30 100644 --- a/heat/tests/test_engine_service.py +++ b/heat/tests/test_engine_service.py @@ -353,6 +353,11 @@ class DummyThreadGroup(object): pass def add_thread(self, callback, *args, **kwargs): + # just to make _start_with_trace() easier to test: + # callback == _start_with_trace + # args[0] == trace_info + # args[1] == actual_callback + callback = args[1] self.threads.append(callback) return DummyThread() @@ -3459,8 +3464,9 @@ class ThreadGroupManagerTest(HeatTestCase): ret = thm.start(stack_id, self.f, *self.fargs, **self.fkwargs) self.assertEqual(self.tg_mock, thm.groups['test']) - self.tg_mock.add_thread.assert_called_with(self.f, *self.fargs, - **self.fkwargs) + self.tg_mock.add_thread.assert_called_with( + thm._start_with_trace, None, + self.f, *self.fargs, **self.fkwargs) self.assertEqual(self.tg_mock.add_thread(), ret) def test_tgm_add_timer(self): diff --git a/requirements.txt b/requirements.txt index a23d198d3..226e970a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ oslo.i18n>=1.0.0 # Apache-2.0 oslo.messaging>=1.4.0 oslo.serialization>=1.0.0 # Apache-2.0 oslo.utils>=1.0.0 # Apache-2.0 +osprofiler>=0.3.0 PasteDeploy>=1.5.0 posix_ipc pycrypto>=2.6