Fix bugs in resource tracker and cleanup

Fixes bugs in resource tracker: * Handle disk oversubscription * Handle suspended/powered off instances The usage model is changed to the old style that is based on actual instance usage on a compute host. (Not the current point in time of the hypervisor's reported host stats) There is now a 'limits' filter property that can be passed from the scheduler to the compute node to indicate that oversubscription of resources is desired: The 'limits' filter property is a dict with the following possible keys: * memory_mb - Specifies the memory ceiling for the compute node. * disk_gb - Specifies the disk space ceiling for the compute node. * vcpu - Specifies the max number of vcpus for the compute node. There is also some general cleanup and additional unit tests in an attempt to simplify down this function. bug 1048842 bug 1052157 Change-Id: I6ee851b8c03234a78a64d9f5c494dfc7059cdda4
2012-09-14 15:17:07 +00:00 · 2012-09-14 15:17:07 +00:00 · a187d8885e
commit a187d8885e
parent be99338950
9 changed files with 158 additions and 50 deletions
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@ -132,6 +132,9 @@ class FilterScheduler(driver.Scheduler):
        # Add a retry entry for the selected compute host:
        self._add_retry_host(filter_properties, weighted_host.host_state.host)

+        self._add_oversubscription_policy(filter_properties,
+                weighted_host.host_state)
+
        payload = dict(request_spec=request_spec,
                       weighted_host=weighted_host.to_dict(),
                       instance_id=instance_uuid)
@ -160,6 +163,9 @@ class FilterScheduler(driver.Scheduler):
        hosts = retry['hosts']
        hosts.append(host)

+    def _add_oversubscription_policy(self, filter_properties, host_state):
+        filter_properties['limits'] = host_state.limits
+
    def _get_configuration_options(self):
        """Fetch options dictionary. Broken out for testing."""
        return self.options.get_configuration()
--- a/nova/scheduler/filters/core_filter.py
+++ b/nova/scheduler/filters/core_filter.py
@ -47,4 +47,10 @@ class CoreFilter(filters.BaseHostFilter):

        instance_vcpus = instance_type['vcpus']
        vcpus_total = host_state.vcpus_total * FLAGS.cpu_allocation_ratio
+
+        # Only provide a VCPU limit to compute if the virt driver is reporting
+        # an accurate count of installed VCPUs. (XenServer driver does not)
+        if vcpus_total > 0:
+            host_state.limits['vcpu'] = vcpus_total
+
        return (vcpus_total - host_state.vcpus_used) >= instance_vcpus
--- a/nova/scheduler/filters/disk_filter.py
+++ b/nova/scheduler/filters/disk_filter.py
@ -0,0 +1,54 @@
+# Copyright (c) 2012 OpenStack, LLC.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from nova import flags
+from nova.openstack.common import cfg
+from nova.openstack.common import log as logging
+from nova.scheduler import filters
+
+LOG = logging.getLogger(__name__)
+
+disk_allocation_ratio_opt = cfg.FloatOpt("disk_allocation_ratio", default=1.0,
+                         help="virtual disk to physical disk allocation ratio")
+
+FLAGS = flags.FLAGS
+FLAGS.register_opt(disk_allocation_ratio_opt)
+
+
+class DiskFilter(filters.BaseHostFilter):
+    """Disk Filter with over subscription flag"""
+
+    def host_passes(self, host_state, filter_properties):
+        """Filter based on disk usage"""
+        instance_type = filter_properties.get('instance_type')
+        requested_disk = 1024 * (instance_type['root_gb'] +
+                                 instance_type['ephemeral_gb'])
+
+        free_disk_mb = host_state.free_disk_mb
+        total_usable_disk_mb = host_state.total_usable_disk_gb * 1024
+
+        disk_mb_limit = total_usable_disk_mb * FLAGS.disk_allocation_ratio
+        used_disk_mb = total_usable_disk_mb - free_disk_mb
+        usable_disk_mb = disk_mb_limit - used_disk_mb
+
+        if not usable_disk_mb >= requested_disk:
+            LOG.debug(_("%(host_state)s does not have %(requested_disk)s MB "
+                    "usable disk, it only has %(usable_disk_mb)s MB usable "
+                    "disk."), locals())
+            return False
+
+        disk_gb_limit = disk_mb_limit / 1024
+        host_state.limits['disk_gb'] = disk_gb_limit
+        return True
--- a/nova/scheduler/filters/ram_filter.py
+++ b/nova/scheduler/filters/ram_filter.py
@ -39,17 +39,15 @@ class RamFilter(filters.BaseHostFilter):
        free_ram_mb = host_state.free_ram_mb
        total_usable_ram_mb = host_state.total_usable_ram_mb

-        oversubscribed_ram_limit_mb = (total_usable_ram_mb *
-                FLAGS.ram_allocation_ratio)
+        memory_mb_limit = total_usable_ram_mb * FLAGS.ram_allocation_ratio
        used_ram_mb = total_usable_ram_mb - free_ram_mb
-        usable_ram = oversubscribed_ram_limit_mb - used_ram_mb
+        usable_ram = memory_mb_limit - used_ram_mb
        if not usable_ram >= requested_ram:
            LOG.debug(_("%(host_state)s does not have %(requested_ram)s MB "
                    "usable ram, it only has %(usable_ram)s MB usable ram."),
                    locals())
            return False

-        # save oversubscribe ram limit so the compute host can verify
-        # memory availability on builds:
-        filter_properties['memory_mb_limit'] = oversubscribed_ram_limit_mb
+        # save oversubscription limit for compute node to test against:
+        host_state.limits['memory_mb'] = memory_mb_limit
        return True
--- a/nova/scheduler/host_manager.py
+++ b/nova/scheduler/host_manager.py
@ -27,14 +27,7 @@ from nova.openstack.common import log as logging
 from nova.openstack.common import timeutils
 from nova.scheduler import filters

-
 host_manager_opts = [
-    cfg.IntOpt('reserved_host_disk_mb',
-               default=0,
-               help='Amount of disk in MB to reserve for host/dom0'),
-    cfg.IntOpt('reserved_host_memory_mb',
-               default=512,
-               help='Amount of memory in MB to reserve for host/dom0'),
    cfg.MultiStrOpt('scheduler_available_filters',
            default=['nova.scheduler.filters.standard_filters'],
            help='Filter classes available to the scheduler which may '
@ -112,32 +105,31 @@ class HostState(object):
        self.service = ReadOnlyDict(service)
        # Mutable available resources.
        # These will change as resources are virtually "consumed".
+        self.total_usable_disk_gb = 0
+        self.disk_mb_used = 0
        self.free_ram_mb = 0
        self.free_disk_mb = 0
        self.vcpus_total = 0
        self.vcpus_used = 0

+        # Resource oversubscription values for the compute host:
+        self.limits = {}
+
    def update_from_compute_node(self, compute):
        """Update information about a host from its compute_node info."""
-        all_disk_mb = compute['local_gb'] * 1024
        all_ram_mb = compute['memory_mb']

        # Assume virtual size is all consumed by instances if use qcow2 disk.
        least = compute.get('disk_available_least')
        free_disk_mb = least if least is not None else compute['free_disk_gb']
        free_disk_mb *= 1024
-        free_ram_mb = compute['free_ram_mb']

-        if FLAGS.reserved_host_disk_mb > 0:
-            all_disk_mb -= FLAGS.reserved_host_disk_mb
-            free_disk_mb -= FLAGS.reserved_host_disk_mb
-        if FLAGS.reserved_host_memory_mb > 0:
-            all_ram_mb -= FLAGS.reserved_host_memory_mb
-            free_ram_mb -= FLAGS.reserved_host_memory_mb
+        self.disk_mb_used = compute['local_gb_used'] * 1024

        #NOTE(jogo) free_ram_mb can be negative
-        self.free_ram_mb = free_ram_mb
+        self.free_ram_mb = compute['free_ram_mb']
        self.total_usable_ram_mb = all_ram_mb
+        self.total_usable_disk_gb = compute['local_gb']
        self.free_disk_mb = free_disk_mb
        self.vcpus_total = compute['vcpus']
        self.vcpus_used = compute['vcpus_used']
--- a/nova/tests/scheduler/fakes.py
+++ b/nova/tests/scheduler/fakes.py
@ -28,16 +28,20 @@ from nova.scheduler import host_manager
 COMPUTE_NODES = [
        dict(id=1, local_gb=1024, memory_mb=1024, vcpus=1,
             disk_available_least=512, free_ram_mb=512, vcpus_used=1,
-             free_disk_mb=512, service=dict(host='host1', disabled=False)),
+             free_disk_mb=512, local_gb_used=0,
+             service=dict(host='host1', disabled=False)),
        dict(id=2, local_gb=2048, memory_mb=2048, vcpus=2,
             disk_available_least=1024, free_ram_mb=1024, vcpus_used=2,
-             free_disk_mb=1024, service=dict(host='host2', disabled=True)),
+             free_disk_mb=1024, local_gb_used=0,
+             service=dict(host='host2', disabled=True)),
        dict(id=3, local_gb=4096, memory_mb=4096, vcpus=4,
             disk_available_least=3072, free_ram_mb=3072, vcpus_used=1,
-             free_disk_mb=3072, service=dict(host='host3', disabled=False)),
+             free_disk_mb=3072, local_gb_used=0,
+             service=dict(host='host3', disabled=False)),
        dict(id=4, local_gb=8192, memory_mb=8192, vcpus=8,
             disk_available_least=8192, free_ram_mb=8192, vcpus_used=0,
-             free_disk_mb=8192, service=dict(host='host4', disabled=False)),
+             free_disk_mb=8192, local_gb_used=0,
+             service=dict(host='host4', disabled=False)),
        # Broken entry
        dict(id=5, local_gb=1024, memory_mb=1024, vcpus=1, service=None),
 ]
--- a/nova/tests/scheduler/test_filter_scheduler.py
+++ b/nova/tests/scheduler/test_filter_scheduler.py
@ -216,7 +216,6 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        self.assertEqual(info['called'], 0)

    def test_get_cost_functions(self):
-        self.flags(reserved_host_memory_mb=128)
        fixture = fakes.FakeFilterScheduler()
        fns = fixture.get_cost_functions()
        self.assertEquals(len(fns), 1)
@ -225,8 +224,9 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        hostinfo = host_manager.HostState('host', 'compute')
        hostinfo.update_from_compute_node(dict(memory_mb=1000,
                local_gb=0, vcpus=1, disk_available_least=1000,
-                free_disk_mb=1000, free_ram_mb=1000, vcpus_used=0))
-        self.assertEquals(1000 - 128, fn(hostinfo, {}))
+                free_disk_mb=1000, free_ram_mb=872, vcpus_used=0,
+                local_gb_used=0))
+        self.assertEquals(872, fn(hostinfo, {}))

    def test_max_attempts(self):
        self.flags(scheduler_max_attempts=4)
--- a/nova/tests/scheduler/test_host_filters.py
+++ b/nova/tests/scheduler/test_host_filters.py
@ -514,6 +514,18 @@ class HostFiltersTestCase(test.TestCase):
                 'capabilities': capabilities, 'service': service})
        self.assertFalse(filt_cls.host_passes(host, filter_properties))

+    def test_ram_filter_passes(self):
+        self._stub_service_is_up(True)
+        filt_cls = self.class_map['RamFilter']()
+        self.flags(ram_allocation_ratio=1.0)
+        filter_properties = {'instance_type': {'memory_mb': 1024}}
+        capabilities = {'enabled': True}
+        service = {'disabled': False}
+        host = fakes.FakeHostState('host1', 'compute',
+                {'free_ram_mb': 1024, 'total_usable_ram_mb': 1024,
+                 'capabilities': capabilities, 'service': service})
+        self.assertTrue(filt_cls.host_passes(host, filter_properties))
+
    def test_ram_filter_oversubscribe(self):
        self._stub_service_is_up(True)
        filt_cls = self.class_map['RamFilter']()
@ -525,24 +537,62 @@ class HostFiltersTestCase(test.TestCase):
                {'free_ram_mb': -1024, 'total_usable_ram_mb': 2048,
                 'capabilities': capabilities, 'service': service})
        self.assertTrue(filt_cls.host_passes(host, filter_properties))
+        self.assertEqual(2048 * 2.0, host.limits['memory_mb'])

-    def test_ram_filter_sets_memory_limit(self):
-        """Test that ram filter sets a filter_property denoting the memory
-        ceiling.
-        """
+    def test_disk_filter_passes(self):
        self._stub_service_is_up(True)
-        filt_cls = self.class_map['RamFilter']()
-        self.flags(ram_allocation_ratio=2.0)
-        filter_properties = {'instance_type': {'memory_mb': 1024}}
+        filt_cls = self.class_map['DiskFilter']()
+        self.flags(disk_allocation_ratio=1.0)
+        filter_properties = {'instance_type': {'root_gb': 1,
+                                               'ephemeral_gb': 1}}
        capabilities = {'enabled': True}
        service = {'disabled': False}
        host = fakes.FakeHostState('host1', 'compute',
-                {'free_ram_mb': -1024, 'total_usable_ram_mb': 2048,
+                {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 13,
                 'capabilities': capabilities, 'service': service})
-        filt_cls.host_passes(host, filter_properties)
+        self.assertTrue(filt_cls.host_passes(host, filter_properties))

-        self.assertEqual(host.total_usable_ram_mb * 2.0,
-                filter_properties['memory_mb_limit'])
+    def test_disk_filter_fails(self):
+        self._stub_service_is_up(True)
+        filt_cls = self.class_map['DiskFilter']()
+        self.flags(disk_allocation_ratio=1.0)
+        filter_properties = {'instance_type': {'root_gb': 2,
+                                               'ephemeral_gb': 1}}
+        capabilities = {'enabled': True}
+        service = {'disabled': False}
+        host = fakes.FakeHostState('host1', 'compute',
+                {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 13,
+                 'capabilities': capabilities, 'service': service})
+        self.assertTrue(filt_cls.host_passes(host, filter_properties))
+
+    def test_disk_filter_oversubscribe(self):
+        self._stub_service_is_up(True)
+        filt_cls = self.class_map['DiskFilter']()
+        self.flags(disk_allocation_ratio=10.0)
+        filter_properties = {'instance_type': {'root_gb': 100,
+                                               'ephemeral_gb': 19}}
+        capabilities = {'enabled': True}
+        service = {'disabled': False}
+        # 1GB used... so 119GB allowed...
+        host = fakes.FakeHostState('host1', 'compute',
+                {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 12,
+                 'capabilities': capabilities, 'service': service})
+        self.assertTrue(filt_cls.host_passes(host, filter_properties))
+        self.assertEqual(12 * 10.0, host.limits['disk_gb'])
+
+    def test_disk_filter_oversubscribe_fail(self):
+        self._stub_service_is_up(True)
+        filt_cls = self.class_map['DiskFilter']()
+        self.flags(disk_allocation_ratio=10.0)
+        filter_properties = {'instance_type': {'root_gb': 100,
+                                               'ephemeral_gb': 20}}
+        capabilities = {'enabled': True}
+        service = {'disabled': False}
+        # 1GB used... so 119GB allowed...
+        host = fakes.FakeHostState('host1', 'compute',
+                {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 12,
+                 'capabilities': capabilities, 'service': service})
+        self.assertFalse(filt_cls.host_passes(host, filter_properties))

    def test_compute_filter_fails_on_service_disabled(self):
        self._stub_service_is_up(True)
--- a/nova/tests/scheduler/test_host_manager.py
+++ b/nova/tests/scheduler/test_host_manager.py
@ -122,8 +122,6 @@ class HostManagerTestCase(test.TestCase):
        self.assertDictMatch(service_states, expected)

    def test_get_all_host_states(self):
-        self.flags(reserved_host_memory_mb=512,
-                reserved_host_disk_mb=1024)

        context = 'fake_context'
        topic = 'compute'
@ -145,18 +143,18 @@ class HostManagerTestCase(test.TestCase):
            host = compute_node['service']['host']
            self.assertEqual(host_states[host].service,
                    compute_node['service'])
-        self.assertEqual(host_states['host1'].free_ram_mb, 0)
+        self.assertEqual(host_states['host1'].free_ram_mb, 512)
        # 511GB
-        self.assertEqual(host_states['host1'].free_disk_mb, 523264)
-        self.assertEqual(host_states['host2'].free_ram_mb, 512)
+        self.assertEqual(host_states['host1'].free_disk_mb, 524288)
+        self.assertEqual(host_states['host2'].free_ram_mb, 1024)
        # 1023GB
-        self.assertEqual(host_states['host2'].free_disk_mb, 1047552)
-        self.assertEqual(host_states['host3'].free_ram_mb, 2560)
+        self.assertEqual(host_states['host2'].free_disk_mb, 1048576)
+        self.assertEqual(host_states['host3'].free_ram_mb, 3072)
        # 3071GB
-        self.assertEqual(host_states['host3'].free_disk_mb, 3144704)
-        self.assertEqual(host_states['host4'].free_ram_mb, 7680)
+        self.assertEqual(host_states['host3'].free_disk_mb, 3145728)
+        self.assertEqual(host_states['host4'].free_ram_mb, 8192)
        # 8191GB
-        self.assertEqual(host_states['host4'].free_disk_mb, 8387584)
+        self.assertEqual(host_states['host4'].free_disk_mb, 8388608)


 class HostStateTestCase(test.TestCase):