Make HostManager track NUMA usage

This patch adds tracking of NUMA usage in scheduler's HostManager class,
by adding it to the HostState, and making sure it gets updated along
with all the other usage data.

Change-Id: Id31c9a7fd54b918b3d38abb61b305cf7048a6a7d
Blueprint: virt-driver-numa-placement
This commit is contained in:
Nikola Dipanov 2014-08-18 12:35:23 +02:00 committed by Dan Smith
parent 5358480699
commit 80b97fba33
5 changed files with 76 additions and 44 deletions

View File

@ -34,6 +34,7 @@ from nova.openstack.common import timeutils
from nova.pci import pci_stats
from nova.scheduler import filters
from nova.scheduler import weights
from nova.virt import hardware
host_manager_opts = [
cfg.MultiStrOpt('scheduler_available_filters',
@ -122,6 +123,7 @@ class HostState(object):
self.free_disk_mb = 0
self.vcpus_total = 0
self.vcpus_used = 0
self.numa_topology = None
# Additional host information from the compute node stats:
self.num_instances = 0
@ -197,6 +199,7 @@ class HostState(object):
self.vcpus_total = compute['vcpus']
self.vcpus_used = compute['vcpus_used']
self.updated = compute['updated_at']
self.numa_topology = compute['numa_topology']
if 'pci_stats' in compute:
self.pci_stats = pci_stats.PciDeviceStats(compute['pci_stats'])
else:
@ -244,6 +247,11 @@ class HostState(object):
if pci_requests.requests and self.pci_stats:
self.pci_stats.apply_requests(pci_requests.requests)
# Calculate the numa usage
updated_numa_topology = hardware.get_host_numa_usage_from_instance(
self, instance)
self.numa_topology = updated_numa_topology
vm_state = instance.get('vm_state', vm_states.BUILDING)
task_state = instance.get('task_state')
if vm_state == vm_states.BUILDING or task_state in [

View File

@ -23,7 +23,13 @@ from nova import db
from nova.openstack.common import jsonutils
from nova.scheduler import filter_scheduler
from nova.scheduler import host_manager
from nova.virt import hardware
NUMA_TOPOLOGY = hardware.VirtNUMAHostTopology(
cells=[hardware.VirtNUMATopologyCellUsage(
0, set([1, 2]), 512),
hardware.VirtNUMATopologyCellUsage(
1, set([3, 4]), 512)])
COMPUTE_NODES = [
dict(id=1, local_gb=1024, memory_mb=1024, vcpus=1,
@ -31,25 +37,25 @@ COMPUTE_NODES = [
free_disk_gb=512, local_gb_used=0, updated_at=None,
service=dict(host='host1', disabled=False),
hypervisor_hostname='node1', host_ip='127.0.0.1',
hypervisor_version=0),
hypervisor_version=0, numa_topology=None),
dict(id=2, local_gb=2048, memory_mb=2048, vcpus=2,
disk_available_least=1024, free_ram_mb=1024, vcpus_used=2,
free_disk_gb=1024, local_gb_used=0, updated_at=None,
service=dict(host='host2', disabled=True),
hypervisor_hostname='node2', host_ip='127.0.0.1',
hypervisor_version=0),
hypervisor_version=0, numa_topology=None),
dict(id=3, local_gb=4096, memory_mb=4096, vcpus=4,
disk_available_least=3333, free_ram_mb=3072, vcpus_used=1,
free_disk_gb=3072, local_gb_used=0, updated_at=None,
service=dict(host='host3', disabled=False),
hypervisor_hostname='node3', host_ip='127.0.0.1',
hypervisor_version=0),
hypervisor_version=0, numa_topology=NUMA_TOPOLOGY.to_json()),
dict(id=4, local_gb=8192, memory_mb=8192, vcpus=8,
disk_available_least=8192, free_ram_mb=8192, vcpus_used=0,
free_disk_gb=8888, local_gb_used=0, updated_at=None,
service=dict(host='host4', disabled=False),
hypervisor_hostname='node4', host_ip='127.0.0.1',
hypervisor_version=0),
hypervisor_version=0, numa_topology=None),
# Broken entry
dict(id=5, local_gb=1024, memory_mb=1024, vcpus=1, service=None),
]
@ -60,7 +66,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=512, local_gb_used=0, updated_at=None,
service=dict(host='host1', disabled=False),
hypervisor_hostname='node1', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 512,
'timestamp': None,
@ -77,7 +83,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=1024, local_gb_used=0, updated_at=None,
service=dict(host='host2', disabled=True),
hypervisor_hostname='node2', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 1024,
'timestamp': None,
@ -94,7 +100,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=3072, local_gb_used=0, updated_at=None,
service=dict(host='host3', disabled=False),
hypervisor_hostname='node3', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 3072,
'timestamp': None,
@ -111,7 +117,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=8192, local_gb_used=0, updated_at=None,
service=dict(host='host4', disabled=False),
hypervisor_hostname='node4', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 8192,
'timestamp': None,
@ -128,7 +134,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=768, local_gb_used=0, updated_at=None,
service=dict(host='host5', disabled=False),
hypervisor_hostname='node5', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 768,
'timestamp': None,
@ -150,7 +156,7 @@ COMPUTE_NODES_METRICS = [
free_disk_gb=2048, local_gb_used=0, updated_at=None,
service=dict(host='host6', disabled=False),
hypervisor_hostname='node6', host_ip='127.0.0.1',
hypervisor_version=0,
hypervisor_version=0, numa_topology=None,
metrics=jsonutils.dumps([{'name': 'foo',
'value': 2048,
'timestamp': None,

View File

@ -80,10 +80,10 @@ class CachingSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.driver.select_destinations,
self.context, fake_request_spec, {})
@mock.patch.object(host_manager.objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=host_manager.objects.InstancePCIRequests(requests=[]))
def test_select_destination_works(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_select_destination_works(self, mock_get_extra):
fake_request_spec = self._get_fake_request_spec()
fake_host = self._get_fake_host_state()
self.driver.all_host_states = [fake_host]
@ -133,10 +133,10 @@ class CachingSchedulerTestCase(test_scheduler.SchedulerTestCase):
}
return host_state
@mock.patch.object(host_manager.objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=host_manager.objects.InstancePCIRequests(requests=[]))
def test_performance_check_select_destination(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_performance_check_select_destination(self, mock_get_extra):
hosts = 2
requests = 1

View File

@ -154,10 +154,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.driver.schedule_run_instance(fake_context, request_spec,
None, None, None, None, {}, False)
@mock.patch.object(objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
def test_schedule_happy_day(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_schedule_happy_day(self, mock_get_extra):
"""Make sure there's nothing glaringly wrong with _schedule()
by doing a happy day pass through.
"""
@ -459,10 +459,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
self._group_details_in_filter_properties(group, 'get_by_name',
group.name, 'anti-affinity')
@mock.patch.object(objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
def test_schedule_host_pool(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_schedule_host_pool(self, mock_get_extra):
"""Make sure the scheduler_host_subset_size property works properly."""
self.flags(scheduler_host_subset_size=2)
@ -492,10 +492,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# one host should be chosen
self.assertEqual(len(hosts), 1)
@mock.patch.object(objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
def test_schedule_large_host_pool(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_schedule_large_host_pool(self, mock_get_extra):
"""Hosts should still be chosen if pool size
is larger than number of filtered hosts.
"""
@ -526,10 +526,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# one host should be chose
self.assertEqual(len(hosts), 1)
@mock.patch.object(objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
def test_schedule_chooses_best_host(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_schedule_chooses_best_host(self, mock_get_extra):
"""If scheduler_host_subset_size is 1, the largest host with greatest
weight should be returned.
"""
@ -576,10 +576,10 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.assertEqual(50, hosts[0].weight)
@mock.patch.object(objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
def test_select_destinations(self, mock_pci_req):
@mock.patch('nova.db.instance_extra_get_by_instance_uuid',
return_value={'numa_topology': None,
'pci_requests': None})
def test_select_destinations(self, mock_get_extra):
"""select_destinations is basically a wrapper around _schedule().
Similar to the _schedule tests, this just does a happy path test to

View File

@ -17,6 +17,7 @@ Tests For HostManager
"""
import mock
import six
from nova.compute import task_states
from nova.compute import vm_states
@ -27,8 +28,10 @@ from nova.openstack.common import timeutils
from nova.scheduler import filters
from nova.scheduler import host_manager
from nova import test
from nova.tests import matchers
from nova.tests.scheduler import fakes
from nova import utils
from nova.virt import hardware
class FakeFilterClass1(filters.BaseHostFilter):
@ -311,6 +314,11 @@ class HostManagerTestCase(test.NoDBTestCase):
# 3071GB
self.assertEqual(host_states_map[('host3', 'node3')].free_disk_mb,
3145728)
self.assertThat(
hardware.VirtNUMAHostTopology.from_json(
host_states_map[('host3', 'node3')].numa_topology
)._to_dict(),
matchers.DictMatches(fakes.NUMA_TOPOLOGY._to_dict()))
self.assertEqual(host_states_map[('host4', 'node4')].free_ram_mb,
8192)
# 8191GB
@ -404,7 +412,7 @@ class HostStateTestCase(test.NoDBTestCase):
hypervisor_type='htype',
hypervisor_hostname='hostname', cpu_info='cpu_info',
supported_instances='{}',
hypervisor_version=hyper_ver_int)
hypervisor_version=hyper_ver_int, numa_topology=None)
host = host_manager.HostState("fakehost", "fakenode")
host.update_from_compute_node(compute)
@ -439,7 +447,7 @@ class HostStateTestCase(test.NoDBTestCase):
compute = dict(stats=stats, memory_mb=0, free_disk_gb=0, local_gb=0,
local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
updated_at=None, host_ip='127.0.0.1',
hypervisor_version=hyper_ver_int)
hypervisor_version=hyper_ver_int, numa_topology=None)
host = host_manager.HostState("fakehost", "fakenode")
host.update_from_compute_node(compute)
@ -465,7 +473,7 @@ class HostStateTestCase(test.NoDBTestCase):
compute = dict(stats=stats, memory_mb=0, free_disk_gb=0, local_gb=0,
local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
updated_at=None, host_ip='127.0.0.1',
hypervisor_version=hyper_ver_int)
hypervisor_version=hyper_ver_int, numa_topology=None)
host = host_manager.HostState("fakehost", "fakenode")
host.update_from_compute_node(compute)
@ -480,7 +488,10 @@ class HostStateTestCase(test.NoDBTestCase):
@mock.patch.object(host_manager.objects.InstancePCIRequests,
'get_by_instance_uuid',
return_value=host_manager.objects.InstancePCIRequests(requests=[]))
def test_stat_consumption_from_instance(self, mock_pci_req):
@mock.patch('nova.virt.hardware.get_host_numa_usage_from_instance')
def test_stat_consumption_from_instance(self, numa_usage_mock,
mock_pci_req):
numa_usage_mock.return_value = 'fake-consumed-once'
host = host_manager.HostState("fakehost", "fakenode")
instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
@ -488,7 +499,10 @@ class HostStateTestCase(test.NoDBTestCase):
task_state=task_states.SCHEDULING, os_type='Linux',
uuid='fake-uuid')
host.consume_from_instance('fake-context', instance)
numa_usage_mock.assert_called_once_with(host, instance)
self.assertEqual('fake-consumed-once', host.numa_topology)
numa_usage_mock.return_value = 'fake-consumed-twice'
instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
project_id='12345', vm_state=vm_states.PAUSED,
task_state=None, os_type='Linux',
@ -497,6 +511,9 @@ class HostStateTestCase(test.NoDBTestCase):
self.assertEqual(2, host.num_instances)
self.assertEqual(1, host.num_io_ops)
self.assertEqual(2, numa_usage_mock.call_count)
self.assertEqual(((host, instance),), numa_usage_mock.call_args)
self.assertEqual('fake-consumed-twice', host.numa_topology)
def test_resources_consumption_from_compute_node(self):
metrics = [
@ -514,8 +531,8 @@ class HostStateTestCase(test.NoDBTestCase):
memory_mb=0, free_disk_gb=0, local_gb=0,
local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
updated_at=None, host_ip='127.0.0.1',
hypervisor_version=hyper_ver_int)
hypervisor_version=hyper_ver_int,
numa_topology=fakes.NUMA_TOPOLOGY.to_json())
host = host_manager.HostState("fakehost", "fakenode")
host.update_from_compute_node(compute)
@ -525,3 +542,4 @@ class HostStateTestCase(test.NoDBTestCase):
self.assertEqual('source1', host.metrics['res1'].source)
self.assertEqual('string2', host.metrics['res2'].value)
self.assertEqual('source2', host.metrics['res2'].source)
self.assertIsInstance(host.numa_topology, six.string_types)