Add extra logging to request filters

This adds a timing decorator to request_filter.py which logs the
filters that we ran and how long they took. This requires a small
calling convention change for these request filters to return
True if they are enabled and/or did anything.

This also sprinkles some debug logging into all the existing filters
to provide contextual information about what they are doing to
make debugging scheduling decisions easier.

Change-Id: Ibf62c5398a95c75fde815309e7a4f82f1bd9649c
This commit is contained in:
Dan Smith 2019-05-09 09:24:41 -07:00
parent d4c8725940
commit b5142d807b
2 changed files with 89 additions and 11 deletions

View File

@ -10,8 +10,11 @@
# License for the specific language governing permissions and limitations
# under the License.
import functools
import os_traits
from oslo_log import log as logging
from oslo_utils import timeutils
import nova.conf
from nova import exception
@ -24,6 +27,25 @@ LOG = logging.getLogger(__name__)
TENANT_METADATA_KEY = 'filter_tenant_id'
def trace_request_filter(fn):
@functools.wraps(fn)
def wrapper(ctxt, request_spec):
timer = timeutils.StopWatch()
ran = False
with timer:
try:
ran = fn(ctxt, request_spec)
finally:
if ran:
# Only log info if the filter was enabled and not
# excluded for some reason
LOG.debug('Request filter %r took %.1f seconds',
fn.__name__, timer.elapsed())
return ran
return wrapper
@trace_request_filter
def require_tenant_aggregate(ctxt, request_spec):
"""Require hosts in an aggregate based on tenant id.
@ -37,7 +59,7 @@ def require_tenant_aggregate(ctxt, request_spec):
enabled = CONF.scheduler.limit_tenants_to_placement_aggregate
agg_required = CONF.scheduler.placement_aggregate_required_for_tenants
if not enabled:
return
return False
aggregates = objects.AggregateList.get_by_metadata(
ctxt, value=request_spec.project_id)
@ -54,13 +76,20 @@ def require_tenant_aggregate(ctxt, request_spec):
request_spec.requested_destination = objects.Destination()
destination = request_spec.requested_destination
destination.require_aggregates(aggregate_uuids_for_tenant)
LOG.debug('require_tenant_aggregate request filter added '
'aggregates %s for tenant %r',
','.join(aggregate_uuids_for_tenant),
request_spec.project_id)
elif agg_required:
LOG.warning('Tenant %(tenant)s has no available aggregates',
{'tenant': request_spec.project_id})
raise exception.RequestFilterFailed(
reason=_('No hosts available for tenant'))
return True
@trace_request_filter
def map_az_to_placement_aggregate(ctxt, request_spec):
"""Map requested nova availability zones to placement aggregates.
@ -68,11 +97,11 @@ def map_az_to_placement_aggregate(ctxt, request_spec):
matches the desired AZ of the user's request.
"""
if not CONF.scheduler.query_placement_for_availability_zone:
return
return False
az_hint = request_spec.availability_zone
if not az_hint:
return
return False
aggregates = objects.AggregateList.get_by_metadata(ctxt,
key='availability_zone',
@ -81,10 +110,17 @@ def map_az_to_placement_aggregate(ctxt, request_spec):
if ('requested_destination' not in request_spec or
request_spec.requested_destination is None):
request_spec.requested_destination = objects.Destination()
request_spec.requested_destination.require_aggregates(
[agg.uuid for agg in aggregates])
agg_uuids = [agg.uuid for agg in aggregates]
request_spec.requested_destination.require_aggregates(agg_uuids)
LOG.debug('map_az_to_placement_aggregate request filter added '
'aggregates %s for az %r',
','.join(agg_uuids),
az_hint)
return True
@trace_request_filter
def require_image_type_support(ctxt, request_spec):
"""Request type-specific trait on candidates.
@ -92,19 +128,19 @@ def require_image_type_support(ctxt, request_spec):
disk_format of the image provided.
"""
if not CONF.scheduler.query_placement_for_image_type_support:
return
return False
if request_spec.is_bfv:
# We are booting from volume, and thus compute node image
# disk_format support does not matter.
return
return False
disk_format = request_spec.image.disk_format
trait_name = 'COMPUTE_IMAGE_TYPE_%s' % disk_format.upper()
if not hasattr(os_traits, trait_name):
LOG.error(('Computed trait name %r is not valid; '
'is os-traits up to date?'), trait_name)
return
return False
# NOTE(danms): We are using the transient flavor in the request spec
# to add the trait that we need. We make sure that we reset the dirty-ness
@ -112,6 +148,11 @@ def require_image_type_support(ctxt, request_spec):
request_spec.flavor.extra_specs['trait:%s' % trait_name] = 'required'
request_spec.obj_reset_changes(fields=['flavor'], recursive=True)
LOG.debug('require_image_type_support request filter added required '
'trait %s', trait_name)
return True
ALL_REQUEST_FILTERS = [
require_tenant_aggregate,

View File

@ -12,6 +12,7 @@
import mock
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
from nova import context as nova_context
from nova import exception
@ -40,6 +41,28 @@ class TestRequestFilter(test.NoDBTestCase):
filter.assert_called_once_with(mock.sentinel.context,
mock.sentinel.reqspec)
@mock.patch.object(timeutils, 'now')
def test_log_timer(self, mock_now):
mock_now.return_value = 123
result = False
@request_filter.trace_request_filter
def tester(c, r):
mock_now.return_value += 2
return result
with mock.patch.object(request_filter, 'LOG') as log:
# With a False return, no log should be emitted
tester(None, None)
log.debug.assert_not_called()
# With a True return, elapsed time should be logged
result = True
tester(None, None)
log.debug.assert_called_once_with('Request filter %r'
' took %.1f seconds',
'tester', 2.0)
@mock.patch('nova.objects.AggregateList.get_by_metadata')
def test_require_tenant_aggregate_disabled(self, getmd):
self.flags(limit_tenants_to_placement_aggregate=False,
@ -49,7 +72,8 @@ class TestRequestFilter(test.NoDBTestCase):
self.assertFalse(getmd.called)
@mock.patch('nova.objects.AggregateList.get_by_metadata')
def test_require_tenant_aggregate(self, getmd):
@mock.patch.object(request_filter, 'LOG')
def test_require_tenant_aggregate(self, mock_log, getmd):
getmd.return_value = [
objects.Aggregate(
uuid=uuids.agg1,
@ -71,6 +95,10 @@ class TestRequestFilter(test.NoDBTestCase):
# necessarily just the one from context.
getmd.assert_called_once_with(self.context, value='owner')
log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
self.assertIn('filter added aggregates', log_lines[0])
self.assertIn('took %.1f seconds', log_lines[1])
@mock.patch('nova.objects.AggregateList.get_by_metadata')
def test_require_tenant_aggregate_no_match(self, getmd):
self.flags(placement_aggregate_required_for_tenants=True,
@ -87,12 +115,16 @@ class TestRequestFilter(test.NoDBTestCase):
self.context, mock.MagicMock())
@mock.patch('nova.objects.AggregateList.get_by_metadata')
def test_map_az(self, getmd):
@mock.patch.object(request_filter, 'LOG')
def test_map_az(self, mock_log, getmd):
getmd.return_value = [objects.Aggregate(uuid=uuids.agg1)]
reqspec = objects.RequestSpec(availability_zone='fooaz')
request_filter.map_az_to_placement_aggregate(self.context, reqspec)
self.assertEqual([uuids.agg1],
reqspec.requested_destination.aggregates)
log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
self.assertIn('filter added aggregates', log_lines[0])
self.assertIn('took %.1f seconds', log_lines[1])
@mock.patch('nova.objects.AggregateList.get_by_metadata')
def test_map_az_no_hint(self, getmd):
@ -182,7 +214,8 @@ class TestRequestFilter(test.NoDBTestCase):
request_filter.require_image_type_support(self.context, reqspec)
self.assertEqual({}, reqspec.flavor.extra_specs)
def test_require_image_type_support_adds_trait(self):
@mock.patch.object(request_filter, 'LOG')
def test_require_image_type_support_adds_trait(self, mock_log):
self.flags(query_placement_for_image_type_support=True,
group='scheduler')
reqspec = objects.RequestSpec(
@ -194,3 +227,7 @@ class TestRequestFilter(test.NoDBTestCase):
self.assertEqual({'trait:COMPUTE_IMAGE_TYPE_RAW': 'required'},
reqspec.flavor.extra_specs)
self.assertEqual(set(), reqspec.flavor.obj_what_changed())
log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
self.assertIn('added required trait', log_lines[0])
self.assertIn('took %.1f seconds', log_lines[1])