Pass limit to /allocation_requests

This makes us pass an upper limit to placement when doing scheduling activities. Without this, we'll always receive every host in the deployment (that has space for the request), which may be very large. Closes-Bug: #1746294 Change-Id: I1c34964a74123b3d94ccae89d7cac0426b57b9b6
2018-01-05 14:47:50 -08:00 · 2018-01-05 14:47:50 -08:00 · f029784eaf
commit f029784eaf
parent 02b505806a
4 changed files with 38 additions and 3 deletions
--- a/nova/conf/scheduler.py
+++ b/nova/conf/scheduler.py
@ -125,6 +125,25 @@ enabled, where others may prefer to manually discover hosts when one
 is added to avoid any overhead from constantly checking. If enabled,
 every time this runs, we will select any unmapped hosts out of each
 cell database on every run.
+"""),
+    cfg.IntOpt("max_placement_results",
+               default=1000,
+               min=1,
+               help="""
+This setting determines the maximum limit on results received from the
+placement service during a scheduling operation. It effectively limits
+the number of hosts that may be considered for scheduling requests that
+match a large number of candidates.
+
+A value of 1 (the minimum) will effectively defer scheduling to the placement
+service strictly on "will it fit" grounds. A higher value will put an upper
+cap on the number of results the scheduler will consider during the filtering
+and weighing process. Large deployments may need to set this lower than the
+total number of hosts available to limit memory consumption, network traffic,
+etc. of the scheduler.
+
+This option is only used by the FilterScheduler; if you use a different
+scheduler, this option has no effect.
 """),
 ]

--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -335,6 +335,7 @@ class SchedulerReportClient(object):
            for (rc, amount) in res.items()))
        qs_params = {
            'resources': resource_query,
+            'limit': CONF.scheduler.max_placement_results,
        }
        if required_traits:
            qs_params['required'] = ",".join(required_traits)
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -1506,7 +1506,8 @@ class TestProviderOperations(SchedulerReportClientTestCase):

        expected_url = '/allocation_candidates?%s' % parse.urlencode(
            {'resources': 'MEMORY_MB:1024,VCPU:1',
-             'required': 'CUSTOM_TRAIT1'})
+             'required': 'CUSTOM_TRAIT1',
+             'limit': 1000})
        self.ks_adap_mock.get.assert_called_once_with(
            expected_url, raise_exc=False, microversion='1.17')
        self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs)
@ -1531,7 +1532,8 @@ class TestProviderOperations(SchedulerReportClientTestCase):
                self.client.get_allocation_candidates(resources)

        expected_url = '/allocation_candidates?%s' % parse.urlencode(
-            {'resources': 'MEMORY_MB:1024,VCPU:1'})
+            {'resources': 'MEMORY_MB:1024,VCPU:1',
+             'limit': 1000})
        self.ks_adap_mock.get.assert_called_once_with(
            expected_url, raise_exc=False, microversion='1.17')
        self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs)
@ -1542,12 +1544,17 @@ class TestProviderOperations(SchedulerReportClientTestCase):
        resp_mock = mock.Mock(status_code=404)
        self.ks_adap_mock.get.return_value = resp_mock

+        # Make sure we're also honoring the configured limit
+        self.flags(max_placement_results=100, group='scheduler')
+
        resources = scheduler_utils.ResourceRequest.from_extra_specs(
            {'resources:MEMORY_MB': '1024'})

        res = self.client.get_allocation_candidates(resources)

-        expected_url = '/allocation_candidates?resources=MEMORY_MB%3A1024'
+        expected_url = ('/allocation_candidates?%s' % parse.urlencode(
+            {'resources': 'MEMORY_MB:1024',
+             'limit': '100'}))
        self.ks_adap_mock.get.assert_called_once_with(
            expected_url, raise_exc=False, microversion='1.17')
        self.assertIsNone(res[0])
--- a/releasenotes/notes/scheduler-limit-placement-650fc06be2a08781.yaml
+++ b/releasenotes/notes/scheduler-limit-placement-650fc06be2a08781.yaml
@ -0,0 +1,8 @@
+---
+fixes:
+  - |
+    The FilterScheduler now limits the number of results in the query it makes
+    to placement to avoid situations where every compute node in a large
+    deployment is returned. This is configurable with the new
+    ``[scheduler]/max_placement_results`` configuration option, which defaults
+    to 1000, a sane starting value for any size deployment.