Instances with NUMA will be packed onto hosts

This patch makes the NUMATopologyFilter and instance claims on the compute host use instance fitting logic to allow for actually packing instances onto NUMA capable hosts. This also means that the NUMA placement that is calculated during a successfull claim will need to be updated in the database to reflect the host NUMA cell ids the instance cells will be pinned to. Using fit_instance_to_host() to decide weather an instance can land on a host makes the NUMATopologyFilter code cleaner as it now fully re-uses all the logic in VirtNUMAHostTopology and VirtNUMATopologyCellUsage classes. Change-Id: Ieabafea73b4d566f4194ca60be38b6415d8a8f3d Closes-bug: #1386236
2014-11-12 17:14:01 +01:00 · 2014-11-12 17:14:01 +01:00 · 53099f3bf2
commit 53099f3bf2
parent a59e1a9c7e
8 changed files with 61 additions and 47 deletions
--- a/doc/source/devref/filter_scheduler.rst
+++ b/doc/source/devref/filter_scheduler.rst
@ -282,8 +282,7 @@ and try to match it with the topology exposed by the host, accounting for the
 ``ram_allocation_ratio`` and ``cpu_allocation_ratio`` for over-subscription. The
 filtering is done in the following manner:

-* Filter will try to match the exact NUMA cells of the instance to those of
-  the host. It *will not* attempt to pack the instance onto the host.
+* Filter will attempt to pack instance cells onto host cells.
 * It will consider the standard over-subscription limits for each host NUMA cell,
  and provide limits to the compute host accordingly (as mentioned above).
 * If instance has no topology defined, it will be considered for any host.
--- a/nova/compute/claims.py
+++ b/nova/compute/claims.py
@ -36,6 +36,7 @@ class NopClaim(object):

    def __init__(self, migration=None):
        self.migration = migration
+        self.claimed_numa_topology = None

    @property
    def disk_gb(self):
@ -201,13 +202,22 @@ class Claim(NopClaim):

    def _test_numa_topology(self, resources, limit):
        host_topology = resources.get('numa_topology')
-        if host_topology and limit:
+        requested_topology = (self.numa_topology and
+                                self.numa_topology.topology_from_obj())
+        if host_topology:
            host_topology = hardware.VirtNUMAHostTopology.from_json(
                    host_topology)
-            instances_topology = (
-                    [self.numa_topology] if self.numa_topology else [])
-            return hardware.VirtNUMAHostTopology.claim_test(
-                    host_topology, instances_topology, limit)
+            instance_topology = (
+                    hardware.VirtNUMAHostTopology.fit_instance_to_host(
+                        host_topology, requested_topology,
+                        limits_topology=limit))
+            if requested_topology and not instance_topology:
+                return (_("Requested instance NUMA topology cannot fit "
+                          "the given host NUMA topology"))
+            elif instance_topology:
+                self.claimed_numa_topology = (
+                        objects.InstanceNUMATopology.obj_from_topology(
+                            instance_topology))

    def _test(self, type_, unit, total, used, requested, limit):
        """Test if the given type of resource needed for a claim can be safely
@ -264,8 +274,11 @@ class ResizeClaim(Claim):

    @property
    def numa_topology(self):
-        return hardware.VirtNUMAInstanceTopology.get_constraints(
+        instance_topology = hardware.VirtNUMAInstanceTopology.get_constraints(
                    self.instance_type, self.image_meta)
+        if instance_topology:
+            return objects.InstanceNUMATopology.obj_from_topology(
+                    instance_topology)

    def _test_pci(self):
        pci_requests = objects.InstancePCIRequests.\
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -1402,7 +1402,7 @@ class ComputeManager(manager.Manager):
        rt = self._get_resource_tracker(node)
        try:
            limits = filter_properties.get('limits', {})
-            with rt.instance_claim(context, instance, limits):
+            with rt.instance_claim(context, instance, limits) as inst_claim:
                # NOTE(russellb) It's important that this validation be done
                # *after* the resource tracker instance claim, as that is where
                # the host is set on the instance.
@ -1422,6 +1422,7 @@ class ComputeManager(manager.Manager):

                instance.vm_state = vm_states.BUILDING
                instance.task_state = task_states.BLOCK_DEVICE_MAPPING
+                instance.numa_topology = inst_claim.claimed_numa_topology
                instance.save()

                block_device_info = self._prep_block_device(
@ -2089,7 +2090,7 @@ class ComputeManager(manager.Manager):
                extra_usage_info={'image_name': image_name})
        try:
            rt = self._get_resource_tracker(node)
-            with rt.instance_claim(context, instance, limits):
+            with rt.instance_claim(context, instance, limits) as inst_claim:
                # NOTE(russellb) It's important that this validation be done
                # *after* the resource tracker instance claim, as that is where
                # the host is set on the instance.
@ -2100,6 +2101,7 @@ class ComputeManager(manager.Manager):
                        block_device_mapping) as resources:
                    instance.vm_state = vm_states.BUILDING
                    instance.task_state = task_states.SPAWNING
+                    instance.numa_topology = inst_claim.claimed_numa_topology
                    instance.save(expected_task_state=
                            task_states.BLOCK_DEVICE_MAPPING)
                    block_device_info = resources['block_device_info']
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@ -130,6 +130,7 @@ class ResourceTracker(object):
                             overhead=overhead, limits=limits)

        self._set_instance_host_and_node(context, instance_ref)
+        instance_ref['numa_topology'] = claim.claimed_numa_topology

        # Mark resources in-use and update stats
        self._update_usage_from_instance(context, self.compute_node,
@ -596,9 +597,16 @@ class ResourceTracker(object):
                    instance['system_metadata'])

        if itype:
+            host_topology = resources.get('numa_topology')
+            if host_topology:
+                host_topology = hardware.VirtNUMAHostTopology.from_json(
+                        host_topology)
            numa_topology = (
                    hardware.VirtNUMAInstanceTopology.get_constraints(
                        itype, image_meta))
+            numa_topology = (
+                    hardware.VirtNUMAHostTopology.fit_instance_to_host(
+                        host_topology, numa_topology))
            usage = self._get_usage_dict(
                        itype, numa_topology=numa_topology)
            if self.pci_tracker:
--- a/nova/scheduler/filters/numa_topology_filter.py
+++ b/nova/scheduler/filters/numa_topology_filter.py
@ -28,34 +28,28 @@ class NUMATopologyFilter(filters.BaseHostFilter):
        cpu_ratio = CONF.cpu_allocation_ratio
        request_spec = filter_properties.get('request_spec', {})
        instance = request_spec.get('instance_properties', {})
-        instance_topology = hardware.instance_topology_from_instance(instance)
+        requested_topology = hardware.instance_topology_from_instance(instance)
        host_topology, _fmt = hardware.host_topology_and_format_from_host(
                host_state)
-        if instance_topology:
-            if host_topology:
-                if not hardware.VirtNUMAHostTopology.can_fit_instances(
-                        host_topology, [instance_topology]):
-                    return False
-
+        if requested_topology and host_topology:
            limit_cells = []
-                usage_after_instance = (
-                        hardware.VirtNUMAHostTopology.usage_from_instances(
-                            host_topology, [instance_topology]))
-                for cell in usage_after_instance.cells:
+            for cell in host_topology.cells:
                max_cell_memory = int(cell.memory * ram_ratio)
                max_cell_cpu = len(cell.cpuset) * cpu_ratio
-                    if (cell.memory_usage > max_cell_memory or
-                            cell.cpu_usage > max_cell_cpu):
-                        return False
-                    limit_cells.append(
-                        hardware.VirtNUMATopologyCellLimit(
+                limit_cells.append(hardware.VirtNUMATopologyCellLimit(
                    cell.id, cell.cpuset, cell.memory,
                    max_cell_cpu, max_cell_memory))
-                host_state.limits['numa_topology'] = (
-                        hardware.VirtNUMALimitTopology(
-                            cells=limit_cells).to_json())
+            limits = hardware.VirtNUMALimitTopology(cells=limit_cells)
+            instance_topology = (
+                    hardware.VirtNUMAHostTopology.fit_instance_to_host(
+                        host_topology, requested_topology,
+                        limits_topology=limits))
+            if not instance_topology:
+                return False
+            host_state.limits['numa_topology'] = limits.to_json()
+            instance['numa_topology'] = instance_topology.to_json()
            return True
-            else:
+        elif requested_topology:
            return False
        else:
            return True
--- a/nova/tests/unit/compute/test_claims.py
+++ b/nova/tests/unit/compute/test_claims.py
@ -236,7 +236,7 @@ class ClaimTestCase(test.NoDBTestCase):
    def test_numa_topology_no_limit(self, mock_get):
        huge_instance = hardware.VirtNUMAInstanceTopology(
                cells=[hardware.VirtNUMATopologyCellInstance(
-                    1, set([1, 2, 3, 4, 5]), 2048)])
+                    1, set([1, 2]), 512)])
        self._claim(numa_topology=huge_instance)

    def test_numa_topology_fails(self, mock_get):
@ -256,7 +256,7 @@ class ClaimTestCase(test.NoDBTestCase):
    def test_numa_topology_passes(self, mock_get):
        huge_instance = hardware.VirtNUMAInstanceTopology(
                cells=[hardware.VirtNUMATopologyCellInstance(
-                    1, set([1, 2, 3, 4, 5]), 2048)])
+                    1, set([1, 2]), 512)])
        limit_topo = hardware.VirtNUMALimitTopology(
                cells=[hardware.VirtNUMATopologyCellLimit(
                            1, [1, 2], 512, cpu_limit=5, memory_limit=4096),
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@ -868,8 +868,8 @@ class InstanceClaimTestCase(BaseTrackerTestCase):
        memory_mb = FAKE_VIRT_MEMORY_MB * 2
        root_gb = ephemeral_gb = FAKE_VIRT_LOCAL_GB
        vcpus = FAKE_VIRT_VCPUS * 2
-        claim_topology = self._claim_topology(memory_mb)
-        instance_topology = self._instance_topology(memory_mb)
+        claim_topology = self._claim_topology(3)
+        instance_topology = self._instance_topology(3)

        limits = {'memory_mb': memory_mb + FAKE_VIRT_MEMORY_OVERHEAD,
                  'disk_gb': root_gb * 2,
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@ -1137,14 +1137,12 @@ def instance_topology_from_instance(instance):
            # Remove when request_spec is a proper object itself!
            dict_cells = instance_numa_topology.get('cells')
            if dict_cells:
-                cells = [objects.InstanceNUMACell(id=cell['id'],
-                                                  cpuset=set(cell['cpuset']),
-                                                  memory=cell['memory'],
-                                                  pagesize=cell.get(
-                                                      'pagesize'))
+                cells = [VirtNUMATopologyCellInstance(cell['id'],
+                                                      set(cell['cpuset']),
+                                                      cell['memory'],
+                                                      cell.get('pagesize'))
                         for cell in dict_cells]
-                instance_numa_topology = (
-                        objects.InstanceNUMATopology(cells=cells))
+                instance_numa_topology = VirtNUMAInstanceTopology(cells=cells)

    return instance_numa_topology