From a187d8885efac1409dc26fa037a915dea88b7120 Mon Sep 17 00:00:00 2001 From: Brian Elliott Date: Fri, 14 Sep 2012 15:17:07 +0000 Subject: [PATCH] Fix bugs in resource tracker and cleanup Fixes bugs in resource tracker: * Handle disk oversubscription * Handle suspended/powered off instances The usage model is changed to the old style that is based on actual instance usage on a compute host. (Not the current point in time of the hypervisor's reported host stats) There is now a 'limits' filter property that can be passed from the scheduler to the compute node to indicate that oversubscription of resources is desired: The 'limits' filter property is a dict with the following possible keys: * memory_mb - Specifies the memory ceiling for the compute node. * disk_gb - Specifies the disk space ceiling for the compute node. * vcpu - Specifies the max number of vcpus for the compute node. There is also some general cleanup and additional unit tests in an attempt to simplify down this function. bug 1048842 bug 1052157 Change-Id: I6ee851b8c03234a78a64d9f5c494dfc7059cdda4 --- nova/scheduler/filter_scheduler.py | 6 ++ nova/scheduler/filters/core_filter.py | 6 ++ nova/scheduler/filters/disk_filter.py | 54 ++++++++++++++ nova/scheduler/filters/ram_filter.py | 10 ++- nova/scheduler/host_manager.py | 24 +++---- nova/tests/scheduler/fakes.py | 12 ++-- nova/tests/scheduler/test_filter_scheduler.py | 6 +- nova/tests/scheduler/test_host_filters.py | 72 ++++++++++++++++--- nova/tests/scheduler/test_host_manager.py | 18 +++-- 9 files changed, 158 insertions(+), 50 deletions(-) create mode 100644 nova/scheduler/filters/disk_filter.py diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index 371aebf53..b2928177d 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -132,6 +132,9 @@ class FilterScheduler(driver.Scheduler): # Add a retry entry for the selected compute host: self._add_retry_host(filter_properties, weighted_host.host_state.host) + self._add_oversubscription_policy(filter_properties, + weighted_host.host_state) + payload = dict(request_spec=request_spec, weighted_host=weighted_host.to_dict(), instance_id=instance_uuid) @@ -160,6 +163,9 @@ class FilterScheduler(driver.Scheduler): hosts = retry['hosts'] hosts.append(host) + def _add_oversubscription_policy(self, filter_properties, host_state): + filter_properties['limits'] = host_state.limits + def _get_configuration_options(self): """Fetch options dictionary. Broken out for testing.""" return self.options.get_configuration() diff --git a/nova/scheduler/filters/core_filter.py b/nova/scheduler/filters/core_filter.py index 5af68bc9f..98b0930d9 100644 --- a/nova/scheduler/filters/core_filter.py +++ b/nova/scheduler/filters/core_filter.py @@ -47,4 +47,10 @@ class CoreFilter(filters.BaseHostFilter): instance_vcpus = instance_type['vcpus'] vcpus_total = host_state.vcpus_total * FLAGS.cpu_allocation_ratio + + # Only provide a VCPU limit to compute if the virt driver is reporting + # an accurate count of installed VCPUs. (XenServer driver does not) + if vcpus_total > 0: + host_state.limits['vcpu'] = vcpus_total + return (vcpus_total - host_state.vcpus_used) >= instance_vcpus diff --git a/nova/scheduler/filters/disk_filter.py b/nova/scheduler/filters/disk_filter.py new file mode 100644 index 000000000..88b8c3377 --- /dev/null +++ b/nova/scheduler/filters/disk_filter.py @@ -0,0 +1,54 @@ +# Copyright (c) 2012 OpenStack, LLC. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova import flags +from nova.openstack.common import cfg +from nova.openstack.common import log as logging +from nova.scheduler import filters + +LOG = logging.getLogger(__name__) + +disk_allocation_ratio_opt = cfg.FloatOpt("disk_allocation_ratio", default=1.0, + help="virtual disk to physical disk allocation ratio") + +FLAGS = flags.FLAGS +FLAGS.register_opt(disk_allocation_ratio_opt) + + +class DiskFilter(filters.BaseHostFilter): + """Disk Filter with over subscription flag""" + + def host_passes(self, host_state, filter_properties): + """Filter based on disk usage""" + instance_type = filter_properties.get('instance_type') + requested_disk = 1024 * (instance_type['root_gb'] + + instance_type['ephemeral_gb']) + + free_disk_mb = host_state.free_disk_mb + total_usable_disk_mb = host_state.total_usable_disk_gb * 1024 + + disk_mb_limit = total_usable_disk_mb * FLAGS.disk_allocation_ratio + used_disk_mb = total_usable_disk_mb - free_disk_mb + usable_disk_mb = disk_mb_limit - used_disk_mb + + if not usable_disk_mb >= requested_disk: + LOG.debug(_("%(host_state)s does not have %(requested_disk)s MB " + "usable disk, it only has %(usable_disk_mb)s MB usable " + "disk."), locals()) + return False + + disk_gb_limit = disk_mb_limit / 1024 + host_state.limits['disk_gb'] = disk_gb_limit + return True diff --git a/nova/scheduler/filters/ram_filter.py b/nova/scheduler/filters/ram_filter.py index 8fb89bf5b..22ba0252c 100644 --- a/nova/scheduler/filters/ram_filter.py +++ b/nova/scheduler/filters/ram_filter.py @@ -39,17 +39,15 @@ class RamFilter(filters.BaseHostFilter): free_ram_mb = host_state.free_ram_mb total_usable_ram_mb = host_state.total_usable_ram_mb - oversubscribed_ram_limit_mb = (total_usable_ram_mb * - FLAGS.ram_allocation_ratio) + memory_mb_limit = total_usable_ram_mb * FLAGS.ram_allocation_ratio used_ram_mb = total_usable_ram_mb - free_ram_mb - usable_ram = oversubscribed_ram_limit_mb - used_ram_mb + usable_ram = memory_mb_limit - used_ram_mb if not usable_ram >= requested_ram: LOG.debug(_("%(host_state)s does not have %(requested_ram)s MB " "usable ram, it only has %(usable_ram)s MB usable ram."), locals()) return False - # save oversubscribe ram limit so the compute host can verify - # memory availability on builds: - filter_properties['memory_mb_limit'] = oversubscribed_ram_limit_mb + # save oversubscription limit for compute node to test against: + host_state.limits['memory_mb'] = memory_mb_limit return True diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 695189881..7b6192c01 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -27,14 +27,7 @@ from nova.openstack.common import log as logging from nova.openstack.common import timeutils from nova.scheduler import filters - host_manager_opts = [ - cfg.IntOpt('reserved_host_disk_mb', - default=0, - help='Amount of disk in MB to reserve for host/dom0'), - cfg.IntOpt('reserved_host_memory_mb', - default=512, - help='Amount of memory in MB to reserve for host/dom0'), cfg.MultiStrOpt('scheduler_available_filters', default=['nova.scheduler.filters.standard_filters'], help='Filter classes available to the scheduler which may ' @@ -112,32 +105,31 @@ class HostState(object): self.service = ReadOnlyDict(service) # Mutable available resources. # These will change as resources are virtually "consumed". + self.total_usable_disk_gb = 0 + self.disk_mb_used = 0 self.free_ram_mb = 0 self.free_disk_mb = 0 self.vcpus_total = 0 self.vcpus_used = 0 + # Resource oversubscription values for the compute host: + self.limits = {} + def update_from_compute_node(self, compute): """Update information about a host from its compute_node info.""" - all_disk_mb = compute['local_gb'] * 1024 all_ram_mb = compute['memory_mb'] # Assume virtual size is all consumed by instances if use qcow2 disk. least = compute.get('disk_available_least') free_disk_mb = least if least is not None else compute['free_disk_gb'] free_disk_mb *= 1024 - free_ram_mb = compute['free_ram_mb'] - if FLAGS.reserved_host_disk_mb > 0: - all_disk_mb -= FLAGS.reserved_host_disk_mb - free_disk_mb -= FLAGS.reserved_host_disk_mb - if FLAGS.reserved_host_memory_mb > 0: - all_ram_mb -= FLAGS.reserved_host_memory_mb - free_ram_mb -= FLAGS.reserved_host_memory_mb + self.disk_mb_used = compute['local_gb_used'] * 1024 #NOTE(jogo) free_ram_mb can be negative - self.free_ram_mb = free_ram_mb + self.free_ram_mb = compute['free_ram_mb'] self.total_usable_ram_mb = all_ram_mb + self.total_usable_disk_gb = compute['local_gb'] self.free_disk_mb = free_disk_mb self.vcpus_total = compute['vcpus'] self.vcpus_used = compute['vcpus_used'] diff --git a/nova/tests/scheduler/fakes.py b/nova/tests/scheduler/fakes.py index 27341a62c..29466fbe1 100644 --- a/nova/tests/scheduler/fakes.py +++ b/nova/tests/scheduler/fakes.py @@ -28,16 +28,20 @@ from nova.scheduler import host_manager COMPUTE_NODES = [ dict(id=1, local_gb=1024, memory_mb=1024, vcpus=1, disk_available_least=512, free_ram_mb=512, vcpus_used=1, - free_disk_mb=512, service=dict(host='host1', disabled=False)), + free_disk_mb=512, local_gb_used=0, + service=dict(host='host1', disabled=False)), dict(id=2, local_gb=2048, memory_mb=2048, vcpus=2, disk_available_least=1024, free_ram_mb=1024, vcpus_used=2, - free_disk_mb=1024, service=dict(host='host2', disabled=True)), + free_disk_mb=1024, local_gb_used=0, + service=dict(host='host2', disabled=True)), dict(id=3, local_gb=4096, memory_mb=4096, vcpus=4, disk_available_least=3072, free_ram_mb=3072, vcpus_used=1, - free_disk_mb=3072, service=dict(host='host3', disabled=False)), + free_disk_mb=3072, local_gb_used=0, + service=dict(host='host3', disabled=False)), dict(id=4, local_gb=8192, memory_mb=8192, vcpus=8, disk_available_least=8192, free_ram_mb=8192, vcpus_used=0, - free_disk_mb=8192, service=dict(host='host4', disabled=False)), + free_disk_mb=8192, local_gb_used=0, + service=dict(host='host4', disabled=False)), # Broken entry dict(id=5, local_gb=1024, memory_mb=1024, vcpus=1, service=None), ] diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py index 130251a0f..9e78f3a50 100644 --- a/nova/tests/scheduler/test_filter_scheduler.py +++ b/nova/tests/scheduler/test_filter_scheduler.py @@ -216,7 +216,6 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): self.assertEqual(info['called'], 0) def test_get_cost_functions(self): - self.flags(reserved_host_memory_mb=128) fixture = fakes.FakeFilterScheduler() fns = fixture.get_cost_functions() self.assertEquals(len(fns), 1) @@ -225,8 +224,9 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): hostinfo = host_manager.HostState('host', 'compute') hostinfo.update_from_compute_node(dict(memory_mb=1000, local_gb=0, vcpus=1, disk_available_least=1000, - free_disk_mb=1000, free_ram_mb=1000, vcpus_used=0)) - self.assertEquals(1000 - 128, fn(hostinfo, {})) + free_disk_mb=1000, free_ram_mb=872, vcpus_used=0, + local_gb_used=0)) + self.assertEquals(872, fn(hostinfo, {})) def test_max_attempts(self): self.flags(scheduler_max_attempts=4) diff --git a/nova/tests/scheduler/test_host_filters.py b/nova/tests/scheduler/test_host_filters.py index 19b008212..42d43a197 100644 --- a/nova/tests/scheduler/test_host_filters.py +++ b/nova/tests/scheduler/test_host_filters.py @@ -514,6 +514,18 @@ class HostFiltersTestCase(test.TestCase): 'capabilities': capabilities, 'service': service}) self.assertFalse(filt_cls.host_passes(host, filter_properties)) + def test_ram_filter_passes(self): + self._stub_service_is_up(True) + filt_cls = self.class_map['RamFilter']() + self.flags(ram_allocation_ratio=1.0) + filter_properties = {'instance_type': {'memory_mb': 1024}} + capabilities = {'enabled': True} + service = {'disabled': False} + host = fakes.FakeHostState('host1', 'compute', + {'free_ram_mb': 1024, 'total_usable_ram_mb': 1024, + 'capabilities': capabilities, 'service': service}) + self.assertTrue(filt_cls.host_passes(host, filter_properties)) + def test_ram_filter_oversubscribe(self): self._stub_service_is_up(True) filt_cls = self.class_map['RamFilter']() @@ -525,24 +537,62 @@ class HostFiltersTestCase(test.TestCase): {'free_ram_mb': -1024, 'total_usable_ram_mb': 2048, 'capabilities': capabilities, 'service': service}) self.assertTrue(filt_cls.host_passes(host, filter_properties)) + self.assertEqual(2048 * 2.0, host.limits['memory_mb']) - def test_ram_filter_sets_memory_limit(self): - """Test that ram filter sets a filter_property denoting the memory - ceiling. - """ + def test_disk_filter_passes(self): self._stub_service_is_up(True) - filt_cls = self.class_map['RamFilter']() - self.flags(ram_allocation_ratio=2.0) - filter_properties = {'instance_type': {'memory_mb': 1024}} + filt_cls = self.class_map['DiskFilter']() + self.flags(disk_allocation_ratio=1.0) + filter_properties = {'instance_type': {'root_gb': 1, + 'ephemeral_gb': 1}} capabilities = {'enabled': True} service = {'disabled': False} host = fakes.FakeHostState('host1', 'compute', - {'free_ram_mb': -1024, 'total_usable_ram_mb': 2048, + {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 13, 'capabilities': capabilities, 'service': service}) - filt_cls.host_passes(host, filter_properties) + self.assertTrue(filt_cls.host_passes(host, filter_properties)) - self.assertEqual(host.total_usable_ram_mb * 2.0, - filter_properties['memory_mb_limit']) + def test_disk_filter_fails(self): + self._stub_service_is_up(True) + filt_cls = self.class_map['DiskFilter']() + self.flags(disk_allocation_ratio=1.0) + filter_properties = {'instance_type': {'root_gb': 2, + 'ephemeral_gb': 1}} + capabilities = {'enabled': True} + service = {'disabled': False} + host = fakes.FakeHostState('host1', 'compute', + {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 13, + 'capabilities': capabilities, 'service': service}) + self.assertTrue(filt_cls.host_passes(host, filter_properties)) + + def test_disk_filter_oversubscribe(self): + self._stub_service_is_up(True) + filt_cls = self.class_map['DiskFilter']() + self.flags(disk_allocation_ratio=10.0) + filter_properties = {'instance_type': {'root_gb': 100, + 'ephemeral_gb': 19}} + capabilities = {'enabled': True} + service = {'disabled': False} + # 1GB used... so 119GB allowed... + host = fakes.FakeHostState('host1', 'compute', + {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 12, + 'capabilities': capabilities, 'service': service}) + self.assertTrue(filt_cls.host_passes(host, filter_properties)) + self.assertEqual(12 * 10.0, host.limits['disk_gb']) + + def test_disk_filter_oversubscribe_fail(self): + self._stub_service_is_up(True) + filt_cls = self.class_map['DiskFilter']() + self.flags(disk_allocation_ratio=10.0) + filter_properties = {'instance_type': {'root_gb': 100, + 'ephemeral_gb': 20}} + capabilities = {'enabled': True} + service = {'disabled': False} + # 1GB used... so 119GB allowed... + host = fakes.FakeHostState('host1', 'compute', + {'free_disk_mb': 11 * 1024, 'total_usable_disk_gb': 12, + 'capabilities': capabilities, 'service': service}) + self.assertFalse(filt_cls.host_passes(host, filter_properties)) def test_compute_filter_fails_on_service_disabled(self): self._stub_service_is_up(True) diff --git a/nova/tests/scheduler/test_host_manager.py b/nova/tests/scheduler/test_host_manager.py index a2f9fc425..2ca8f3ad9 100644 --- a/nova/tests/scheduler/test_host_manager.py +++ b/nova/tests/scheduler/test_host_manager.py @@ -122,8 +122,6 @@ class HostManagerTestCase(test.TestCase): self.assertDictMatch(service_states, expected) def test_get_all_host_states(self): - self.flags(reserved_host_memory_mb=512, - reserved_host_disk_mb=1024) context = 'fake_context' topic = 'compute' @@ -145,18 +143,18 @@ class HostManagerTestCase(test.TestCase): host = compute_node['service']['host'] self.assertEqual(host_states[host].service, compute_node['service']) - self.assertEqual(host_states['host1'].free_ram_mb, 0) + self.assertEqual(host_states['host1'].free_ram_mb, 512) # 511GB - self.assertEqual(host_states['host1'].free_disk_mb, 523264) - self.assertEqual(host_states['host2'].free_ram_mb, 512) + self.assertEqual(host_states['host1'].free_disk_mb, 524288) + self.assertEqual(host_states['host2'].free_ram_mb, 1024) # 1023GB - self.assertEqual(host_states['host2'].free_disk_mb, 1047552) - self.assertEqual(host_states['host3'].free_ram_mb, 2560) + self.assertEqual(host_states['host2'].free_disk_mb, 1048576) + self.assertEqual(host_states['host3'].free_ram_mb, 3072) # 3071GB - self.assertEqual(host_states['host3'].free_disk_mb, 3144704) - self.assertEqual(host_states['host4'].free_ram_mb, 7680) + self.assertEqual(host_states['host3'].free_disk_mb, 3145728) + self.assertEqual(host_states['host4'].free_ram_mb, 8192) # 8191GB - self.assertEqual(host_states['host4'].free_disk_mb, 8387584) + self.assertEqual(host_states['host4'].free_disk_mb, 8388608) class HostStateTestCase(test.TestCase):