When getting an instance using the compute.API we call scatter_gather_single_cell() to be able to capture details when we fail to retrieve a result from a cell such as timeouts and exceptions. Currently however, we aren't logging the content of an exception if scatter_gather_single_cell() returns an exception as the result. The scatter gather method itself logs exceptions that are not of type NovaException as these represent definite unexpected errors such as database errors but NovaException handling are left for the caller to decide whether they want to log it or re-raise it and so on. It can be difficult to debug a situation where a cell is returning a NovaException result so this adds logging of the exception content in the compute API when we encounter an unexpected NovaException. The existing log message has been updated to more accurately reflect what has happened (did not respond vs exception). The assignment of the exception object in scatter gather has also been updated to not unnecessarily construct a new exception object because it (a) wasn't necessary and (b) made asserting the LOG.exception() call argument in the unit test difficult. Related-Bug: #1970087 Change-Id: Iae1c61c72be5b6017b934293e3dc079a24eeb0e7
545 lines
21 KiB
545 lines
21 KiB
# Copyright 2011 OpenStack Foundation
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""RequestContext: context for requests that persist through all of nova."""
from contextlib import contextmanager
import copy
import eventlet.queue
import eventlet.timeout
from keystoneauth1.access import service_catalog as ksa_service_catalog
from keystoneauth1 import plugin
from oslo_context import context
from oslo_db.sqlalchemy import enginefacade
from oslo_log import log as logging
from oslo_utils import timeutils
from nova import exception
from nova.i18n import _
from nova import objects
from nova import policy
from nova import utils
LOG = logging.getLogger(__name__)
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
# waiting for a result from a cell.
did_not_respond_sentinel = object()
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
CELLS = []
# Timeout value for waiting for cells to respond
class _ContextAuthPlugin(plugin.BaseAuthPlugin):
"""A keystoneauth auth plugin that uses the values from the Context.
Ideally we would use the plugin provided by auth_token middleware however
this plugin isn't serialized yet so we construct one from the serialized
auth data.
def __init__(self, auth_token, sc):
super(_ContextAuthPlugin, self).__init__()
self.auth_token = auth_token
self.service_catalog = ksa_service_catalog.ServiceCatalogV2(sc)
def get_token(self, *args, **kwargs):
return self.auth_token
def get_endpoint(self, session, service_type=None, interface=None,
region_name=None, service_name=None, **kwargs):
return self.service_catalog.url_for(service_type=service_type,
class RequestContext(context.RequestContext):
"""Security context and request information.
Represents the user taking a given action within the system.
def __init__(self, user_id=None, project_id=None, is_admin=None,
read_deleted="no", remote_address=None, timestamp=None,
quota_class=None, service_catalog=None,
user_auth_plugin=None, **kwargs):
""":param read_deleted: 'no' indicates deleted records are hidden,
'yes' indicates deleted records are visible,
'only' indicates that *only* deleted records are visible.
:param overwrite: Set to False to ensure that the greenthread local
copy of the index is not overwritten.
:param user_auth_plugin: The auth plugin for the current request's
authentication data.
if user_id:
kwargs['user_id'] = user_id
if project_id:
kwargs['project_id'] = project_id
super(RequestContext, self).__init__(is_admin=is_admin, **kwargs)
self.read_deleted = read_deleted
self.remote_address = remote_address
if not timestamp:
timestamp = timeutils.utcnow()
if isinstance(timestamp, str):
timestamp = timeutils.parse_strtime(timestamp)
self.timestamp = timestamp
if service_catalog:
# Only include required parts of service_catalog
self.service_catalog = [s for s in service_catalog
if s.get('type') in ('image', 'block-storage', 'volumev3',
'key-manager', 'placement', 'network',
# if list is empty or none
self.service_catalog = []
# NOTE(markmc): this attribute is currently only used by the
# rs_limits turnstile pre-processor.
# See https://lists.launchpad.net/openstack/msg12200.html
self.quota_class = quota_class
# NOTE(dheeraj): The following attributes are used by cellsv2 to store
# connection information for connecting to the target cell.
# It is only manipulated using the target_cell contextmanager
# provided by this module
self.db_connection = None
self.mq_connection = None
self.cell_uuid = None
self.user_auth_plugin = user_auth_plugin
if self.is_admin is None:
self.is_admin = policy.check_is_admin(self)
def get_auth_plugin(self):
if self.user_auth_plugin:
return self.user_auth_plugin
return _ContextAuthPlugin(self.auth_token, self.service_catalog)
def _get_read_deleted(self):
return self._read_deleted
def _set_read_deleted(self, read_deleted):
if read_deleted not in ('no', 'yes', 'only'):
raise ValueError(_("read_deleted can only be one of 'no', "
"'yes' or 'only', not %r") % read_deleted)
self._read_deleted = read_deleted
def _del_read_deleted(self):
del self._read_deleted
read_deleted = property(_get_read_deleted, _set_read_deleted,
def to_dict(self):
values = super(RequestContext, self).to_dict()
# FIXME(dims): defensive hasattr() checks need to be
# removed once we figure out why we are seeing stack
# traces
'user_id': getattr(self, 'user_id', None),
'project_id': getattr(self, 'project_id', None),
'is_admin': getattr(self, 'is_admin', None),
'read_deleted': getattr(self, 'read_deleted', 'no'),
'remote_address': getattr(self, 'remote_address', None),
'timestamp': utils.strtime(self.timestamp) if hasattr(
self, 'timestamp') else None,
'request_id': getattr(self, 'request_id', None),
'quota_class': getattr(self, 'quota_class', None),
'user_name': getattr(self, 'user_name', None),
'service_catalog': getattr(self, 'service_catalog', None),
'project_name': getattr(self, 'project_name', None),
# NOTE(tonyb): This can be removed once we're certain to have a
# RequestContext contains 'is_admin_project', We can only get away with
# this because we "know" the default value of 'is_admin_project' which
# is very fragile.
'is_admin_project': getattr(self, 'is_admin_project', True),
return values
def from_dict(cls, values):
return super(RequestContext, cls).from_dict(
# TODO(sdague): oslo.context has show_deleted, if
# possible, we should migrate to that in the future so we
# don't need to be different here.
read_deleted=values.get('read_deleted', 'no'),
def elevated(self, read_deleted=None):
"""Return a version of this context with admin flag set."""
context = copy.copy(self)
# context.roles must be deepcopied to leave original roles
# without changes
context.roles = copy.deepcopy(self.roles)
context.is_admin = True
if 'admin' not in context.roles:
if read_deleted is not None:
context.read_deleted = read_deleted
return context
def can(self, action, target=None, fatal=True):
"""Verifies that the given action is valid on the target in this
:param action: string representing the action to be checked.
:param target: dictionary representing the object of the action
for object creation this should be a dictionary representing the
location of the object
e.g. ``{'project_id': instance.project_id}``.
:param fatal: if False, will return False when an exception.Forbidden
:raises nova.exception.Forbidden: if verification fails and fatal is
:return: returns a non-False value (not necessarily "True") if
authorized and False if not authorized and fatal is False.
return policy.authorize(self, action, target)
except exception.Forbidden:
if fatal:
return False
def to_policy_values(self):
policy = super(RequestContext, self).to_policy_values()
policy['is_admin'] = self.is_admin
return policy
def __str__(self):
return "<Context %s>" % self.to_dict()
def get_context():
"""A helper method to get a blank context.
Note that overwrite is False here so this context will not update the
greenthread-local stored context that is used when logging.
return RequestContext(user_id=None,
def get_admin_context(read_deleted="no"):
# NOTE(alaski): This method should only be used when an admin context is
# necessary for the entirety of the context lifetime. If that's not the
# case please use get_context(), or create the RequestContext manually, and
# use context.elevated() where necessary. Some periodic tasks may use
# get_admin_context so that their database calls are not filtered on
# project_id.
return RequestContext(user_id=None,
def is_user_context(context):
"""Indicates if the request context is a normal user."""
if not context:
return False
if context.is_admin:
return False
if not context.user_id or not context.project_id:
return False
return True
def require_context(ctxt):
"""Raise exception.Forbidden() if context is not a user or an
admin context.
if not ctxt.is_admin and not is_user_context(ctxt):
raise exception.Forbidden()
def authorize_project_context(context, project_id):
"""Ensures a request has permission to access the given project."""
if is_user_context(context):
if not context.project_id:
raise exception.Forbidden()
elif context.project_id != project_id:
raise exception.Forbidden()
def authorize_user_context(context, user_id):
"""Ensures a request has permission to access the given user."""
if is_user_context(context):
if not context.user_id:
raise exception.Forbidden()
elif context.user_id != user_id:
raise exception.Forbidden()
def authorize_quota_class_context(context, class_name):
"""Ensures a request has permission to access the given quota class."""
if is_user_context(context):
if not context.quota_class:
raise exception.Forbidden()
elif context.quota_class != class_name:
raise exception.Forbidden()
def set_target_cell(context, cell_mapping):
"""Adds database connection information to the context
for communicating with the given target_cell.
This is used for permanently targeting a cell in a context.
Use this when you want all subsequent code to target a cell.
Passing None for cell_mapping will untarget the context.
:param context: The RequestContext to add connection information
:param cell_mapping: An objects.CellMapping object or None
if cell_mapping is not None:
# avoid circular import
from nova.db.main import api as db
from nova import rpc
# Synchronize access to the cache by multiple API workers.
def get_or_set_cached_cell_and_set_connections():
cell_tuple = CELL_CACHE[cell_mapping.uuid]
except KeyError:
db_connection_string = cell_mapping.database_connection
context.db_connection = db.create_context_manager(
if not cell_mapping.transport_url.startswith('none'):
context.mq_connection = rpc.create_transport(
context.cell_uuid = cell_mapping.uuid
CELL_CACHE[cell_mapping.uuid] = (context.db_connection,
context.db_connection = cell_tuple[0]
context.mq_connection = cell_tuple[1]
context.cell_uuid = cell_mapping.uuid
context.db_connection = None
context.mq_connection = None
context.cell_uuid = None
def target_cell(context, cell_mapping):
"""Yields a new context with connection information for a specific cell.
This function yields a copy of the provided context, which is targeted to
the referenced cell for MQ and DB connections.
Passing None for cell_mapping will yield an untargetd copy of the context.
:param context: The RequestContext to add connection information
:param cell_mapping: An objects.CellMapping object or None
# Create a sanitized copy of context by serializing and deserializing it
# (like we would do over RPC). This help ensure that we have a clean
# copy of the context with all the tracked attributes, but without any
# of the hidden/private things we cache on a context. We do this to avoid
# unintentional sharing of cached thread-local data across threads.
# Specifically, this won't include any oslo_db-set transaction context, or
# any existing cell targeting.
cctxt = RequestContext.from_dict(context.to_dict())
set_target_cell(cctxt, cell_mapping)
yield cctxt
def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
"""Target cells in parallel and return their results.
The first parameter in the signature of the function to call for each cell
should be of type RequestContext.
:param context: The RequestContext for querying cells
:param cell_mappings: The CellMappings to target in parallel
:param timeout: The total time in seconds to wait for all the results to be
:param fn: The function to call for each cell
:param args: The args for the function to call for each cell, not including
the RequestContext
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
greenthreads = []
queue = eventlet.queue.LightQueue()
results = {}
def gather_result(cell_uuid, fn, *args, **kwargs):
result = fn(*args, **kwargs)
except Exception as e:
# Only log the exception traceback for non-nova exceptions.
if not isinstance(e, exception.NovaException):
LOG.exception('Error gathering result from cell %s', cell_uuid)
result = e
# The queue is already synchronized.
queue.put((cell_uuid, result))
for cell_mapping in cell_mappings:
with target_cell(context, cell_mapping) as cctxt:
utils.spawn(gather_result, cell_mapping.uuid,
fn, cctxt, *args, **kwargs)))
with eventlet.timeout.Timeout(timeout, exception.CellTimeout):
while len(results) != len(greenthreads):
cell_uuid, result = queue.get()
results[cell_uuid] = result
except exception.CellTimeout:
# NOTE(melwitt): We'll fill in did_not_respond_sentinels at the
# same time we kill/wait for the green threads.
# Kill the green threads still pending and wait on those we know are done.
for cell_uuid, greenthread in greenthreads:
if cell_uuid not in results:
results[cell_uuid] = did_not_respond_sentinel
LOG.warning('Timed out waiting for response from cell %s',
return results
def load_cells():
global CELLS
if not CELLS:
CELLS = objects.CellMappingList.get_all(get_admin_context())
LOG.debug('Found %(count)i cells: %(cells)s',
cells=','.join([c.identity for c in CELLS])))
if not CELLS:
LOG.error('No cells are configured, unable to continue')
def is_cell_failure_sentinel(record):
return (record is did_not_respond_sentinel or
isinstance(record, Exception))
def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
"""Target all cells except cell0 in parallel and return their results.
The first parameter in the signature of the function to call for
each cell should be of type RequestContext. There is a timeout for
waiting on all results to be gathered.
:param context: The RequestContext for querying cells
:param fn: The function to call for each cell
:param args: The args for the function to call for each cell, not including
the RequestContext
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
cell_mappings = [cell for cell in CELLS if not cell.is_cell0()]
return scatter_gather_cells(context, cell_mappings, CELL_TIMEOUT,
fn, *args, **kwargs)
def scatter_gather_single_cell(context, cell_mapping, fn, *args, **kwargs):
"""Target the provided cell and return its results or sentinels in case of
The first parameter in the signature of the function to call for each cell
should be of type RequestContext.
:param context: The RequestContext for querying cells
:param cell_mapping: The CellMapping to target
:param fn: The function to call for each cell
:param args: The args for the function to call for each cell, not including
the RequestContext
:param kwargs: The kwargs for the function to call for this cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if the cell did not
respond within the timeout. The exception object will
be returned if the call to the cell raised an exception. The
exception will be logged.
return scatter_gather_cells(context, [cell_mapping], CELL_TIMEOUT, fn,
*args, **kwargs)
def scatter_gather_all_cells(context, fn, *args, **kwargs):
"""Target all cells in parallel and return their results.
The first parameter in the signature of the function to call for
each cell should be of type RequestContext. There is a timeout for
waiting on all results to be gathered.
:param context: The RequestContext for querying cells
:param fn: The function to call for each cell
:param args: The args for the function to call for each cell, not including
the RequestContext
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
return scatter_gather_cells(context, CELLS, CELL_TIMEOUT,
fn, *args, **kwargs)