From f115ce5d747a8a9373c86257f413a261c3aeb39a Mon Sep 17 00:00:00 2001 From: Jay Pipes Date: Wed, 24 Feb 2016 18:54:56 -0500 Subject: [PATCH] Use new inventory schema in compute_node_get_all() In order to allow the scheduler to continue to function exactly the same as it currently does during the time period while we migrate the data and schema from the legacy layout to the resource-providers database schema, we need to ensure that the nova.db.sqlalchemy.api.compute_node_get_all() method returns all the existing fields that may store capacity and usage information in the compute_nodes table *but also* have the method return capacity and usage information from the new inventories and allocations tables. This patch adds some fairly complex SQLAlchemy-fu to the compute_node_get_all() method, joining the compute_nodes table to the new inventories and allocations tables for the three primary resource classes currently stored in the compute_nodes table. For RAM, vCPU and local disk resources, there are additional fields in the returned results that correspond to the capacity and usage information stored in the new inventories and allocations table. For the RAM resource, the following fields are returned: * memory_mb <-- legacy compute_nodes.memory_mb field * memory_mb_used <-- legacy compute_nodes.memory_mb_used field * ram_allocation_ratio <-- legacy compute_nodes.ram_allocation_field * inv_memory_mb <-- new inventories.total field value for the MEMORY_MB resource class for that compute node * inv_memory_mb_reserved <-- new inventories.reserved field value for the MEMORY_MB resource class * inv_memory_mb_used <-- SUM() of the allocations.used field values for the MEMORY_MB resource class for that compute node Similar fields for vCPU and local disk are returned for those resource classes. All of the inv_XXX fields are None if the inventory for the compute node has not been migrated from the compute_nodes table to the inventories table. In this way, the data migration on the ComputeNode object now has the ability to tell if a compute node has had its inventory and usage information migrated to the new resource-providers schema. Change-Id: Idc369310cd8efd8181126f6ae6272f5c835e0299 Partially-implements: compute-node-inventory --- nova/db/sqlalchemy/api.py | 156 +++++++++++++++++++++++++++++- nova/tests/unit/db/test_db_api.py | 119 ++++++++++++++++++++++- 2 files changed, 271 insertions(+), 4 deletions(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 1325d8b3f369..1e26f09cce26 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -38,6 +38,7 @@ from oslo_utils import timeutils from oslo_utils import uuidutils import six from six.moves import range +import sqlalchemy as sa from sqlalchemy import and_ from sqlalchemy.exc import NoSuchTableError from sqlalchemy import MetaData @@ -64,6 +65,7 @@ import nova.context from nova.db.sqlalchemy import models from nova import exception from nova.i18n import _, _LI, _LE, _LW +from nova.objects import fields from nova import quota from nova import safe_utils @@ -622,7 +624,159 @@ def compute_node_get_all_by_host(context, host): @main_context_manager.reader def compute_node_get_all(context): - return model_query(context, models.ComputeNode, read_deleted='no').all() + # NOTE(jaypipes): With the addition of the resource-providers database + # schema, inventory and allocation information for various resources + # on a compute node are to be migrated from the compute_nodes and + # instance_extra tables into the new inventories and allocations tables. + # During the time that this data migration is ongoing we need to allow + # the scheduler to essentially be blind to the underlying database + # schema changes. So, this query here returns three sets of resource + # attributes: + # - inv_memory_mb, inv_memory_mb_used, inv_memory_mb_reserved, + # inv_ram_allocation_ratio + # - inv_vcpus, inv_vcpus_used, inv_cpu_allocation_ratio + # - inv_local_gb, inv_local_gb_used, inv_disk_allocation_ratio + # These resource capacity/usage fields store the total and used values + # for those three resource classes that are currently store in similar + # fields in the compute_nodes table (e.g. memory_mb and memory_mb_used) + # The code that runs the online data migrations will be able to tell if + # the compute node has had its inventory information moved to the + # inventories table by checking for a non-None field value for the + # inv_memory_mb, inv_vcpus, and inv_disk_gb fields. + # + # The below SQLAlchemy code below produces the following SQL statement + # exactly: + # + # SELECT + # cn.*, + # ram_inv.total as inv_memory_mb, + # ram_inv.reserved as inv_memory_mb_reserved, + # ram_inv.allocation_ratio as inv_ram_allocation_ratio, + # ram_usage.used as inv_memory_mb_used, + # cpu_inv.total as inv_vcpus, + # cpu_inv.allocation_ratio as inv_cpu_allocation_ratio, + # cpu_usage.used as inv_vcpus_used, + # disk_inv.total as inv_local_gb, + # disk_inv.allocation_ratio as inv_disk_allocation_ratio, + # disk_usage.used as inv_local_gb_used + # FROM compute_nodes AS cn + # LEFT OUTER JOIN resource_providers AS rp + # ON cn.uuid = rp.uuid + # LEFT OUTER JOIN inventories AS ram_inv + # ON rp.id = ram_inv.resource_provider_id + # AND ram_inv.resource_class_id = :RAM_MB + # LEFT OUTER JOIN ( + # SELECT resource_provider_id, SUM(used) as used + # FROM allocations + # WHERE resource_class_id = :RAM_MB + # GROUP BY resource_provider_id + # ) AS ram_usage + # ON ram_inv.resource_provider_id = ram_usage.resource_provider_id + # LEFT OUTER JOIN inventories AS cpu_inv + # ON rp.id = cpu_inv.resource_provider_id + # AND cpu_inv.resource_class_id = :VCPUS + # LEFT OUTER JOIN ( + # SELECT resource_provider_id, SUM(used) as used + # FROM allocations + # WHERE resource_class_id = :VCPUS + # GROUP BY resource_provider_id + # ) AS cpu_usage + # ON cpu_inv.resource_provider_id = cpu_usage.resource_provider_id + # LEFT OUTER JOIN inventories AS disk_inv + # ON rp.id = disk_inv.resource_provider_id + # AND disk_inv.resource_class_id = :DISK_GB + # LEFT OUTER JOIN ( + # SELECT resource_provider_id, SUM(used) as used + # FROM allocations + # WHERE resource_class_id = :DISK_GB + # GROUP BY resource_provider_id + # ) AS disk_usage + # ON disk_inv.resource_provider_id = disk_usage.resource_provider_id + # WHERE cn.deleted = 0; + RAM_MB = fields.ResourceClass.index(fields.ResourceClass.MEMORY_MB) + VCPU = fields.ResourceClass.index(fields.ResourceClass.VCPU) + DISK_GB = fields.ResourceClass.index(fields.ResourceClass.DISK_GB) + + cn_tbl = sa.alias(models.ComputeNode.__table__, name='cn') + rp_tbl = sa.alias(models.ResourceProvider.__table__, name='rp') + inv_tbl = models.Inventory.__table__ + alloc_tbl = models.Allocation.__table__ + ram_inv = sa.alias(inv_tbl, name='ram_inv') + cpu_inv = sa.alias(inv_tbl, name='cpu_inv') + disk_inv = sa.alias(inv_tbl, name='disk_inv') + + ram_usage = sa.select([alloc_tbl.c.resource_provider_id, + sql.func.sum(alloc_tbl.c.used).label('used')]) + ram_usage = ram_usage.where(alloc_tbl.c.resource_class_id == RAM_MB) + ram_usage = ram_usage.group_by(alloc_tbl.c.resource_provider_id) + ram_usage = sa.alias(ram_usage, name='ram_usage') + + cpu_usage = sa.select([alloc_tbl.c.resource_provider_id, + sql.func.sum(alloc_tbl.c.used).label('used')]) + cpu_usage = cpu_usage.where(alloc_tbl.c.resource_class_id == VCPU) + cpu_usage = cpu_usage.group_by(alloc_tbl.c.resource_provider_id) + cpu_usage = sa.alias(cpu_usage, name='cpu_usage') + + disk_usage = sa.select([alloc_tbl.c.resource_provider_id, + sql.func.sum(alloc_tbl.c.used).label('used')]) + disk_usage = disk_usage.where(alloc_tbl.c.resource_class_id == DISK_GB) + disk_usage = disk_usage.group_by(alloc_tbl.c.resource_provider_id) + disk_usage = sa.alias(disk_usage, name='disk_usage') + + cn_rp_join = sql.outerjoin( + cn_tbl, rp_tbl, + cn_tbl.c.uuid == rp_tbl.c.uuid) + ram_inv_join = sql.outerjoin( + cn_rp_join, ram_inv, + sql.and_(rp_tbl.c.id == ram_inv.c.resource_provider_id, + ram_inv.c.resource_class_id == RAM_MB)) + ram_join = sql.outerjoin( + ram_inv_join, ram_usage, + ram_inv.c.resource_provider_id == ram_usage.c.resource_provider_id) + cpu_inv_join = sql.outerjoin( + ram_join, cpu_inv, + sql.and_(rp_tbl.c.id == cpu_inv.c.resource_provider_id, + cpu_inv.c.resource_class_id == VCPU)) + cpu_join = sql.outerjoin( + cpu_inv_join, cpu_usage, + cpu_inv.c.resource_provider_id == cpu_usage.c.resource_provider_id) + disk_inv_join = sql.outerjoin( + cpu_join, disk_inv, + sql.and_(rp_tbl.c.id == disk_inv.c.resource_provider_id, + disk_inv.c.resource_class_id == DISK_GB)) + disk_join = sql.outerjoin( + disk_inv_join, disk_usage, + disk_inv.c.resource_provider_id == disk_usage.c.resource_provider_id) + # TODO(jaypipes): Remove all capacity and usage fields from this method + # entirely and deal with allocations and inventory information in a + # tabular fashion instead of a columnar fashion like the legacy + # compute_nodes table schema does. + inv_cols = [ + ram_inv.c.total.label('inv_memory_mb'), + ram_inv.c.reserved.label('inv_memory_mb_reserved'), + ram_inv.c.allocation_ratio.label('inv_ram_allocation_ratio'), + ram_usage.c.used.label('inv_memory_mb_used'), + cpu_inv.c.total.label('inv_vcpus'), + cpu_inv.c.allocation_ratio.label('inv_cpu_allocation_ratio'), + cpu_usage.c.used.label('inv_vcpus_used'), + disk_inv.c.total.label('inv_local_gb'), + disk_inv.c.reserved.label('inv_local_gb_reserved'), + disk_inv.c.allocation_ratio.label('inv_disk_allocation_ratio'), + disk_usage.c.used.label('inv_local_gb_used'), + ] + cols_in_output = list(cn_tbl.c) + cols_in_output.extend(inv_cols) + + select = sa.select(cols_in_output).select_from(disk_join) + select = select.where(cn_tbl.c.deleted == 0) + engine = get_engine(context) + conn = engine.connect() + + results = conn.execute(select).fetchall() + # Callers expect dict-like objects, not SQLAlchemy RowProxy objects... + results = [dict(r) for r in results] + conn.close() + return results @main_context_manager.reader diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 9d60702a22b7..1e789fa5c322 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -7338,6 +7338,22 @@ class S3ImageTestCase(test.TestCase): class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin): _ignored_keys = ['id', 'deleted', 'deleted_at', 'created_at', 'updated_at'] + # TODO(jaypipes): Remove once the compute node inventory migration has + # been completed and the scheduler uses the inventories and allocations + # tables directly. + _ignored_temp_resource_providers_keys = [ + 'inv_memory_mb', + 'inv_memory_mb_reserved', + 'inv_ram_allocation_ratio', + 'inv_memory_mb_used', + 'inv_vcpus', + 'inv_cpu_allocation_ratio', + 'inv_vcpus_used', + 'inv_local_gb', + 'inv_local_gb_reserved', + 'inv_disk_allocation_ratio', + 'inv_local_gb_used', + ] def setUp(self): super(ComputeNodeTestCase, self).setUp() @@ -7385,11 +7401,107 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin): self.assertEqual(1, len(nodes)) node = nodes[0] self._assertEqualObjects(self.compute_node_dict, node, - ignored_keys=self._ignored_keys + - ['stats', 'service']) + ignored_keys=self._ignored_keys + + self._ignored_temp_resource_providers_keys + + ['stats', 'service']) new_stats = jsonutils.loads(node['stats']) self.assertEqual(self.stats, new_stats) + def test_compute_node_get_all_provider_schema(self): + # We here test that compute nodes that have inventory and allocation + # entries under the new resource-providers schema return non-None + # values for the inv_* fields in the returned list of dicts from + # compute_node_get_all(). + nodes = db.compute_node_get_all(self.ctxt) + self.assertEqual(1, len(nodes)) + node = nodes[0] + self.assertIsNone(node['inv_memory_mb']) + self.assertIsNone(node['inv_memory_mb_used']) + + RAM_MB = fields.ResourceClass.index(fields.ResourceClass.MEMORY_MB) + VCPU = fields.ResourceClass.index(fields.ResourceClass.VCPU) + DISK_GB = fields.ResourceClass.index(fields.ResourceClass.DISK_GB) + + @sqlalchemy_api.main_context_manager.writer + def create_resource_provider(context): + rp = models.ResourceProvider() + rp.uuid = node['uuid'] + rp.save(context.session) + return rp.id + + @sqlalchemy_api.main_context_manager.writer + def create_inventory(context, provider_id, resource_class, total): + inv = models.Inventory() + inv.resource_provider_id = provider_id + inv.resource_class_id = resource_class + inv.total = total + inv.reserved = 0 + inv.allocation_ratio = 1.0 + inv.min_unit = 1 + inv.max_unit = 1 + inv.step_size = 1 + inv.save(context.session) + + @sqlalchemy_api.main_context_manager.writer + def create_allocation(context, provider_id, resource_class, used): + alloc = models.Allocation() + alloc.resource_provider_id = provider_id + alloc.resource_class_id = resource_class + alloc.consumer_id = 'xyz' + alloc.used = used + alloc.save(context.session) + + # Now add an inventory record for memory and check there is a non-None + # value for the inv_memory_mb field. Don't yet add an allocation record + # for RAM_MB yet so ensure inv_memory_mb_used remains None. + rp_id = create_resource_provider(self.ctxt) + create_inventory(self.ctxt, rp_id, RAM_MB, 4096) + nodes = db.compute_node_get_all(self.ctxt) + self.assertEqual(1, len(nodes)) + node = nodes[0] + self.assertEqual(4096, node['inv_memory_mb']) + self.assertIsNone(node['inv_memory_mb_used']) + + # Now add an allocation record for an instance consuming some memory + # and check there is a non-None value for the inv_memory_mb_used field. + create_allocation(self.ctxt, rp_id, RAM_MB, 64) + nodes = db.compute_node_get_all(self.ctxt) + self.assertEqual(1, len(nodes)) + node = nodes[0] + self.assertEqual(4096, node['inv_memory_mb']) + self.assertEqual(64, node['inv_memory_mb_used']) + + # Because of the complex join conditions, it's best to also test the + # other two resource classes and ensure that the joins are correct. + self.assertIsNone(node['inv_vcpus']) + self.assertIsNone(node['inv_vcpus_used']) + self.assertIsNone(node['inv_local_gb']) + self.assertIsNone(node['inv_local_gb_used']) + + create_inventory(self.ctxt, rp_id, VCPU, 16) + create_allocation(self.ctxt, rp_id, VCPU, 2) + nodes = db.compute_node_get_all(self.ctxt) + self.assertEqual(1, len(nodes)) + node = nodes[0] + self.assertEqual(16, node['inv_vcpus']) + self.assertEqual(2, node['inv_vcpus_used']) + # Check to make sure the other resources stayed the same... + self.assertEqual(4096, node['inv_memory_mb']) + self.assertEqual(64, node['inv_memory_mb_used']) + + create_inventory(self.ctxt, rp_id, DISK_GB, 100) + create_allocation(self.ctxt, rp_id, DISK_GB, 20) + nodes = db.compute_node_get_all(self.ctxt) + self.assertEqual(1, len(nodes)) + node = nodes[0] + self.assertEqual(100, node['inv_local_gb']) + self.assertEqual(20, node['inv_local_gb_used']) + # Check to make sure the other resources stayed the same... + self.assertEqual(4096, node['inv_memory_mb']) + self.assertEqual(64, node['inv_memory_mb_used']) + self.assertEqual(16, node['inv_vcpus']) + self.assertEqual(2, node['inv_vcpus_used']) + def test_compute_node_get_all_deleted_compute_node(self): # Create a service and compute node and ensure we can find its stats; # delete the service and compute node when done and loop again @@ -7448,7 +7560,8 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin): key=lambda n: n['hypervisor_hostname']) self._assertEqualListsOfObjects(expected, result, - ignored_keys=['stats']) + ignored_keys=self._ignored_temp_resource_providers_keys + + ['stats']) def test_compute_node_get_all_by_host_with_distinct_hosts(self): # Create another service with another node