Add extra logging to request filters

This adds a timing decorator to request_filter.py which logs the filters that we ran and how long they took. This requires a small calling convention change for these request filters to return True if they are enabled and/or did anything. This also sprinkles some debug logging into all the existing filters to provide contextual information about what they are doing to make debugging scheduling decisions easier. Change-Id: Ibf62c5398a95c75fde815309e7a4f82f1bd9649c
2019-05-09 09:24:41 -07:00 · 2019-05-09 09:24:41 -07:00 · b5142d807b
parent d4c8725940
commit b5142d807b
2 changed files with 89 additions and 11 deletions
--- a/nova/scheduler/request_filter.py
+++ b/nova/scheduler/request_filter.py
@ -10,8 +10,11 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+import functools
+
 import os_traits
 from oslo_log import log as logging
+from oslo_utils import timeutils

 import nova.conf
 from nova import exception
@ -24,6 +27,25 @@ LOG = logging.getLogger(__name__)
 TENANT_METADATA_KEY = 'filter_tenant_id'


+def trace_request_filter(fn):
+    @functools.wraps(fn)
+    def wrapper(ctxt, request_spec):
+        timer = timeutils.StopWatch()
+        ran = False
+        with timer:
+            try:
+                ran = fn(ctxt, request_spec)
+            finally:
+                if ran:
+                    # Only log info if the filter was enabled and not
+                    # excluded for some reason
+                    LOG.debug('Request filter %r took %.1f seconds',
+                        fn.__name__, timer.elapsed())
+        return ran
+    return wrapper
+
+
+@trace_request_filter
 def require_tenant_aggregate(ctxt, request_spec):
    """Require hosts in an aggregate based on tenant id.

@ -37,7 +59,7 @@ def require_tenant_aggregate(ctxt, request_spec):
    enabled = CONF.scheduler.limit_tenants_to_placement_aggregate
    agg_required = CONF.scheduler.placement_aggregate_required_for_tenants
    if not enabled:
-        return
+        return False

    aggregates = objects.AggregateList.get_by_metadata(
        ctxt, value=request_spec.project_id)
@ -54,13 +76,20 @@ def require_tenant_aggregate(ctxt, request_spec):
            request_spec.requested_destination = objects.Destination()
        destination = request_spec.requested_destination
        destination.require_aggregates(aggregate_uuids_for_tenant)
+        LOG.debug('require_tenant_aggregate request filter added '
+                  'aggregates %s for tenant %r',
+                  ','.join(aggregate_uuids_for_tenant),
+                  request_spec.project_id)
    elif agg_required:
        LOG.warning('Tenant %(tenant)s has no available aggregates',
                    {'tenant': request_spec.project_id})
        raise exception.RequestFilterFailed(
            reason=_('No hosts available for tenant'))

+    return True

+
+@trace_request_filter
 def map_az_to_placement_aggregate(ctxt, request_spec):
    """Map requested nova availability zones to placement aggregates.

@ -68,11 +97,11 @@ def map_az_to_placement_aggregate(ctxt, request_spec):
    matches the desired AZ of the user's request.
    """
    if not CONF.scheduler.query_placement_for_availability_zone:
-        return
+        return False

    az_hint = request_spec.availability_zone
    if not az_hint:
-        return
+        return False

    aggregates = objects.AggregateList.get_by_metadata(ctxt,
                                                       key='availability_zone',
@ -81,10 +110,17 @@ def map_az_to_placement_aggregate(ctxt, request_spec):
        if ('requested_destination' not in request_spec or
                request_spec.requested_destination is None):
            request_spec.requested_destination = objects.Destination()
-        request_spec.requested_destination.require_aggregates(
-            [agg.uuid for agg in aggregates])
+        agg_uuids = [agg.uuid for agg in aggregates]
+        request_spec.requested_destination.require_aggregates(agg_uuids)
+        LOG.debug('map_az_to_placement_aggregate request filter added '
+                  'aggregates %s for az %r',
+                  ','.join(agg_uuids),
+                  az_hint)
+
+    return True


+@trace_request_filter
 def require_image_type_support(ctxt, request_spec):
    """Request type-specific trait on candidates.

@ -92,19 +128,19 @@ def require_image_type_support(ctxt, request_spec):
    disk_format of the image provided.
    """
    if not CONF.scheduler.query_placement_for_image_type_support:
-        return
+        return False

    if request_spec.is_bfv:
        # We are booting from volume, and thus compute node image
        # disk_format support does not matter.
-        return
+        return False

    disk_format = request_spec.image.disk_format
    trait_name = 'COMPUTE_IMAGE_TYPE_%s' % disk_format.upper()
    if not hasattr(os_traits, trait_name):
        LOG.error(('Computed trait name %r is not valid; '
                   'is os-traits up to date?'), trait_name)
-        return
+        return False

    # NOTE(danms): We are using the transient flavor in the request spec
    # to add the trait that we need. We make sure that we reset the dirty-ness
@ -112,6 +148,11 @@ def require_image_type_support(ctxt, request_spec):
    request_spec.flavor.extra_specs['trait:%s' % trait_name] = 'required'
    request_spec.obj_reset_changes(fields=['flavor'], recursive=True)

+    LOG.debug('require_image_type_support request filter added required '
+              'trait %s', trait_name)
+
+    return True
+

 ALL_REQUEST_FILTERS = [
    require_tenant_aggregate,
--- a/nova/tests/unit/scheduler/test_request_filter.py
+++ b/nova/tests/unit/scheduler/test_request_filter.py
@ -12,6 +12,7 @@

 import mock
 from oslo_utils.fixture import uuidsentinel as uuids
+from oslo_utils import timeutils

 from nova import context as nova_context
 from nova import exception
@ -40,6 +41,28 @@ class TestRequestFilter(test.NoDBTestCase):
            filter.assert_called_once_with(mock.sentinel.context,
                                           mock.sentinel.reqspec)

+    @mock.patch.object(timeutils, 'now')
+    def test_log_timer(self, mock_now):
+        mock_now.return_value = 123
+        result = False
+
+        @request_filter.trace_request_filter
+        def tester(c, r):
+            mock_now.return_value += 2
+            return result
+
+        with mock.patch.object(request_filter, 'LOG') as log:
+            # With a False return, no log should be emitted
+            tester(None, None)
+            log.debug.assert_not_called()
+
+            # With a True return, elapsed time should be logged
+            result = True
+            tester(None, None)
+            log.debug.assert_called_once_with('Request filter %r'
+                                              ' took %.1f seconds',
+                                              'tester', 2.0)
+
    @mock.patch('nova.objects.AggregateList.get_by_metadata')
    def test_require_tenant_aggregate_disabled(self, getmd):
        self.flags(limit_tenants_to_placement_aggregate=False,
@ -49,7 +72,8 @@ class TestRequestFilter(test.NoDBTestCase):
        self.assertFalse(getmd.called)

    @mock.patch('nova.objects.AggregateList.get_by_metadata')
-    def test_require_tenant_aggregate(self, getmd):
+    @mock.patch.object(request_filter, 'LOG')
+    def test_require_tenant_aggregate(self, mock_log, getmd):
        getmd.return_value = [
            objects.Aggregate(
                uuid=uuids.agg1,
@ -71,6 +95,10 @@ class TestRequestFilter(test.NoDBTestCase):
        # necessarily just the one from context.
        getmd.assert_called_once_with(self.context, value='owner')

+        log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
+        self.assertIn('filter added aggregates', log_lines[0])
+        self.assertIn('took %.1f seconds', log_lines[1])
+
    @mock.patch('nova.objects.AggregateList.get_by_metadata')
    def test_require_tenant_aggregate_no_match(self, getmd):
        self.flags(placement_aggregate_required_for_tenants=True,
@ -87,12 +115,16 @@ class TestRequestFilter(test.NoDBTestCase):
            self.context, mock.MagicMock())

    @mock.patch('nova.objects.AggregateList.get_by_metadata')
-    def test_map_az(self, getmd):
+    @mock.patch.object(request_filter, 'LOG')
+    def test_map_az(self, mock_log, getmd):
        getmd.return_value = [objects.Aggregate(uuid=uuids.agg1)]
        reqspec = objects.RequestSpec(availability_zone='fooaz')
        request_filter.map_az_to_placement_aggregate(self.context, reqspec)
        self.assertEqual([uuids.agg1],
                         reqspec.requested_destination.aggregates)
+        log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
+        self.assertIn('filter added aggregates', log_lines[0])
+        self.assertIn('took %.1f seconds', log_lines[1])

    @mock.patch('nova.objects.AggregateList.get_by_metadata')
    def test_map_az_no_hint(self, getmd):
@ -182,7 +214,8 @@ class TestRequestFilter(test.NoDBTestCase):
        request_filter.require_image_type_support(self.context, reqspec)
        self.assertEqual({}, reqspec.flavor.extra_specs)

-    def test_require_image_type_support_adds_trait(self):
+    @mock.patch.object(request_filter, 'LOG')
+    def test_require_image_type_support_adds_trait(self, mock_log):
        self.flags(query_placement_for_image_type_support=True,
                   group='scheduler')
        reqspec = objects.RequestSpec(
@ -194,3 +227,7 @@ class TestRequestFilter(test.NoDBTestCase):
        self.assertEqual({'trait:COMPUTE_IMAGE_TYPE_RAW': 'required'},
                         reqspec.flavor.extra_specs)
        self.assertEqual(set(), reqspec.flavor.obj_what_changed())
+
+        log_lines = [c[0][0] for c in mock_log.debug.call_args_list]
+        self.assertIn('added required trait', log_lines[0])
+        self.assertIn('took %.1f seconds', log_lines[1])