Use new inventory schema in compute_node_get_all()

In order to allow the scheduler to continue to function exactly the same
as it currently does during the time period while we migrate the data
and schema from the legacy layout to the resource-providers database
schema, we need to ensure that the
nova.db.sqlalchemy.api.compute_node_get_all() method returns all the
existing fields that may store capacity and usage information in the
compute_nodes table *but also* have the method return capacity and usage
information from the new inventories and allocations tables.

This patch adds some fairly complex SQLAlchemy-fu to the
compute_node_get_all() method, joining the compute_nodes table to the
new inventories and allocations tables for the three primary resource
classes currently stored in the compute_nodes table. For RAM, vCPU and
local disk resources, there are additional fields in the returned
results that correspond to the capacity and usage information stored in
the new inventories and allocations table.

For the RAM resource, the following fields are returned:

 * memory_mb <-- legacy compute_nodes.memory_mb field
 * memory_mb_used <-- legacy compute_nodes.memory_mb_used field
 * ram_allocation_ratio <-- legacy compute_nodes.ram_allocation_field
 * inv_memory_mb <-- new inventories.total field value for the MEMORY_MB
   resource class for that compute node
 * inv_memory_mb_reserved <-- new inventories.reserved field value for
   the MEMORY_MB resource class
 * inv_memory_mb_used <-- SUM() of the allocations.used field values for
   the MEMORY_MB resource class for that compute node

Similar fields for vCPU and local disk are returned for those resource
classes.

All of the inv_XXX fields are None if the inventory for the compute node
has not been migrated from the compute_nodes table to the inventories
table. In this way, the data migration on the ComputeNode object now has
the ability to tell if a compute node has had its inventory and usage
information migrated to the new resource-providers schema.

Change-Id: Idc369310cd8efd8181126f6ae6272f5c835e0299
Partially-implements: compute-node-inventory
This commit is contained in:
Jay Pipes
2016-02-24 18:54:56 -05:00
parent 3b2adde944
commit f115ce5d74
2 changed files with 271 additions and 4 deletions

View File

@@ -38,6 +38,7 @@ from oslo_utils import timeutils
from oslo_utils import uuidutils
import six
from six.moves import range
import sqlalchemy as sa
from sqlalchemy import and_
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy import MetaData
@@ -64,6 +65,7 @@ import nova.context
from nova.db.sqlalchemy import models
from nova import exception
from nova.i18n import _, _LI, _LE, _LW
from nova.objects import fields
from nova import quota
from nova import safe_utils
@@ -622,7 +624,159 @@ def compute_node_get_all_by_host(context, host):
@main_context_manager.reader
def compute_node_get_all(context):
return model_query(context, models.ComputeNode, read_deleted='no').all()
# NOTE(jaypipes): With the addition of the resource-providers database
# schema, inventory and allocation information for various resources
# on a compute node are to be migrated from the compute_nodes and
# instance_extra tables into the new inventories and allocations tables.
# During the time that this data migration is ongoing we need to allow
# the scheduler to essentially be blind to the underlying database
# schema changes. So, this query here returns three sets of resource
# attributes:
# - inv_memory_mb, inv_memory_mb_used, inv_memory_mb_reserved,
# inv_ram_allocation_ratio
# - inv_vcpus, inv_vcpus_used, inv_cpu_allocation_ratio
# - inv_local_gb, inv_local_gb_used, inv_disk_allocation_ratio
# These resource capacity/usage fields store the total and used values
# for those three resource classes that are currently store in similar
# fields in the compute_nodes table (e.g. memory_mb and memory_mb_used)
# The code that runs the online data migrations will be able to tell if
# the compute node has had its inventory information moved to the
# inventories table by checking for a non-None field value for the
# inv_memory_mb, inv_vcpus, and inv_disk_gb fields.
#
# The below SQLAlchemy code below produces the following SQL statement
# exactly:
#
# SELECT
# cn.*,
# ram_inv.total as inv_memory_mb,
# ram_inv.reserved as inv_memory_mb_reserved,
# ram_inv.allocation_ratio as inv_ram_allocation_ratio,
# ram_usage.used as inv_memory_mb_used,
# cpu_inv.total as inv_vcpus,
# cpu_inv.allocation_ratio as inv_cpu_allocation_ratio,
# cpu_usage.used as inv_vcpus_used,
# disk_inv.total as inv_local_gb,
# disk_inv.allocation_ratio as inv_disk_allocation_ratio,
# disk_usage.used as inv_local_gb_used
# FROM compute_nodes AS cn
# LEFT OUTER JOIN resource_providers AS rp
# ON cn.uuid = rp.uuid
# LEFT OUTER JOIN inventories AS ram_inv
# ON rp.id = ram_inv.resource_provider_id
# AND ram_inv.resource_class_id = :RAM_MB
# LEFT OUTER JOIN (
# SELECT resource_provider_id, SUM(used) as used
# FROM allocations
# WHERE resource_class_id = :RAM_MB
# GROUP BY resource_provider_id
# ) AS ram_usage
# ON ram_inv.resource_provider_id = ram_usage.resource_provider_id
# LEFT OUTER JOIN inventories AS cpu_inv
# ON rp.id = cpu_inv.resource_provider_id
# AND cpu_inv.resource_class_id = :VCPUS
# LEFT OUTER JOIN (
# SELECT resource_provider_id, SUM(used) as used
# FROM allocations
# WHERE resource_class_id = :VCPUS
# GROUP BY resource_provider_id
# ) AS cpu_usage
# ON cpu_inv.resource_provider_id = cpu_usage.resource_provider_id
# LEFT OUTER JOIN inventories AS disk_inv
# ON rp.id = disk_inv.resource_provider_id
# AND disk_inv.resource_class_id = :DISK_GB
# LEFT OUTER JOIN (
# SELECT resource_provider_id, SUM(used) as used
# FROM allocations
# WHERE resource_class_id = :DISK_GB
# GROUP BY resource_provider_id
# ) AS disk_usage
# ON disk_inv.resource_provider_id = disk_usage.resource_provider_id
# WHERE cn.deleted = 0;
RAM_MB = fields.ResourceClass.index(fields.ResourceClass.MEMORY_MB)
VCPU = fields.ResourceClass.index(fields.ResourceClass.VCPU)
DISK_GB = fields.ResourceClass.index(fields.ResourceClass.DISK_GB)
cn_tbl = sa.alias(models.ComputeNode.__table__, name='cn')
rp_tbl = sa.alias(models.ResourceProvider.__table__, name='rp')
inv_tbl = models.Inventory.__table__
alloc_tbl = models.Allocation.__table__
ram_inv = sa.alias(inv_tbl, name='ram_inv')
cpu_inv = sa.alias(inv_tbl, name='cpu_inv')
disk_inv = sa.alias(inv_tbl, name='disk_inv')
ram_usage = sa.select([alloc_tbl.c.resource_provider_id,
sql.func.sum(alloc_tbl.c.used).label('used')])
ram_usage = ram_usage.where(alloc_tbl.c.resource_class_id == RAM_MB)
ram_usage = ram_usage.group_by(alloc_tbl.c.resource_provider_id)
ram_usage = sa.alias(ram_usage, name='ram_usage')
cpu_usage = sa.select([alloc_tbl.c.resource_provider_id,
sql.func.sum(alloc_tbl.c.used).label('used')])
cpu_usage = cpu_usage.where(alloc_tbl.c.resource_class_id == VCPU)
cpu_usage = cpu_usage.group_by(alloc_tbl.c.resource_provider_id)
cpu_usage = sa.alias(cpu_usage, name='cpu_usage')
disk_usage = sa.select([alloc_tbl.c.resource_provider_id,
sql.func.sum(alloc_tbl.c.used).label('used')])
disk_usage = disk_usage.where(alloc_tbl.c.resource_class_id == DISK_GB)
disk_usage = disk_usage.group_by(alloc_tbl.c.resource_provider_id)
disk_usage = sa.alias(disk_usage, name='disk_usage')
cn_rp_join = sql.outerjoin(
cn_tbl, rp_tbl,
cn_tbl.c.uuid == rp_tbl.c.uuid)
ram_inv_join = sql.outerjoin(
cn_rp_join, ram_inv,
sql.and_(rp_tbl.c.id == ram_inv.c.resource_provider_id,
ram_inv.c.resource_class_id == RAM_MB))
ram_join = sql.outerjoin(
ram_inv_join, ram_usage,
ram_inv.c.resource_provider_id == ram_usage.c.resource_provider_id)
cpu_inv_join = sql.outerjoin(
ram_join, cpu_inv,
sql.and_(rp_tbl.c.id == cpu_inv.c.resource_provider_id,
cpu_inv.c.resource_class_id == VCPU))
cpu_join = sql.outerjoin(
cpu_inv_join, cpu_usage,
cpu_inv.c.resource_provider_id == cpu_usage.c.resource_provider_id)
disk_inv_join = sql.outerjoin(
cpu_join, disk_inv,
sql.and_(rp_tbl.c.id == disk_inv.c.resource_provider_id,
disk_inv.c.resource_class_id == DISK_GB))
disk_join = sql.outerjoin(
disk_inv_join, disk_usage,
disk_inv.c.resource_provider_id == disk_usage.c.resource_provider_id)
# TODO(jaypipes): Remove all capacity and usage fields from this method
# entirely and deal with allocations and inventory information in a
# tabular fashion instead of a columnar fashion like the legacy
# compute_nodes table schema does.
inv_cols = [
ram_inv.c.total.label('inv_memory_mb'),
ram_inv.c.reserved.label('inv_memory_mb_reserved'),
ram_inv.c.allocation_ratio.label('inv_ram_allocation_ratio'),
ram_usage.c.used.label('inv_memory_mb_used'),
cpu_inv.c.total.label('inv_vcpus'),
cpu_inv.c.allocation_ratio.label('inv_cpu_allocation_ratio'),
cpu_usage.c.used.label('inv_vcpus_used'),
disk_inv.c.total.label('inv_local_gb'),
disk_inv.c.reserved.label('inv_local_gb_reserved'),
disk_inv.c.allocation_ratio.label('inv_disk_allocation_ratio'),
disk_usage.c.used.label('inv_local_gb_used'),
]
cols_in_output = list(cn_tbl.c)
cols_in_output.extend(inv_cols)
select = sa.select(cols_in_output).select_from(disk_join)
select = select.where(cn_tbl.c.deleted == 0)
engine = get_engine(context)
conn = engine.connect()
results = conn.execute(select).fetchall()
# Callers expect dict-like objects, not SQLAlchemy RowProxy objects...
results = [dict(r) for r in results]
conn.close()
return results
@main_context_manager.reader

View File

@@ -7338,6 +7338,22 @@ class S3ImageTestCase(test.TestCase):
class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
_ignored_keys = ['id', 'deleted', 'deleted_at', 'created_at', 'updated_at']
# TODO(jaypipes): Remove once the compute node inventory migration has
# been completed and the scheduler uses the inventories and allocations
# tables directly.
_ignored_temp_resource_providers_keys = [
'inv_memory_mb',
'inv_memory_mb_reserved',
'inv_ram_allocation_ratio',
'inv_memory_mb_used',
'inv_vcpus',
'inv_cpu_allocation_ratio',
'inv_vcpus_used',
'inv_local_gb',
'inv_local_gb_reserved',
'inv_disk_allocation_ratio',
'inv_local_gb_used',
]
def setUp(self):
super(ComputeNodeTestCase, self).setUp()
@@ -7385,11 +7401,107 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
self.assertEqual(1, len(nodes))
node = nodes[0]
self._assertEqualObjects(self.compute_node_dict, node,
ignored_keys=self._ignored_keys +
['stats', 'service'])
ignored_keys=self._ignored_keys +
self._ignored_temp_resource_providers_keys +
['stats', 'service'])
new_stats = jsonutils.loads(node['stats'])
self.assertEqual(self.stats, new_stats)
def test_compute_node_get_all_provider_schema(self):
# We here test that compute nodes that have inventory and allocation
# entries under the new resource-providers schema return non-None
# values for the inv_* fields in the returned list of dicts from
# compute_node_get_all().
nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes))
node = nodes[0]
self.assertIsNone(node['inv_memory_mb'])
self.assertIsNone(node['inv_memory_mb_used'])
RAM_MB = fields.ResourceClass.index(fields.ResourceClass.MEMORY_MB)
VCPU = fields.ResourceClass.index(fields.ResourceClass.VCPU)
DISK_GB = fields.ResourceClass.index(fields.ResourceClass.DISK_GB)
@sqlalchemy_api.main_context_manager.writer
def create_resource_provider(context):
rp = models.ResourceProvider()
rp.uuid = node['uuid']
rp.save(context.session)
return rp.id
@sqlalchemy_api.main_context_manager.writer
def create_inventory(context, provider_id, resource_class, total):
inv = models.Inventory()
inv.resource_provider_id = provider_id
inv.resource_class_id = resource_class
inv.total = total
inv.reserved = 0
inv.allocation_ratio = 1.0
inv.min_unit = 1
inv.max_unit = 1
inv.step_size = 1
inv.save(context.session)
@sqlalchemy_api.main_context_manager.writer
def create_allocation(context, provider_id, resource_class, used):
alloc = models.Allocation()
alloc.resource_provider_id = provider_id
alloc.resource_class_id = resource_class
alloc.consumer_id = 'xyz'
alloc.used = used
alloc.save(context.session)
# Now add an inventory record for memory and check there is a non-None
# value for the inv_memory_mb field. Don't yet add an allocation record
# for RAM_MB yet so ensure inv_memory_mb_used remains None.
rp_id = create_resource_provider(self.ctxt)
create_inventory(self.ctxt, rp_id, RAM_MB, 4096)
nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes))
node = nodes[0]
self.assertEqual(4096, node['inv_memory_mb'])
self.assertIsNone(node['inv_memory_mb_used'])
# Now add an allocation record for an instance consuming some memory
# and check there is a non-None value for the inv_memory_mb_used field.
create_allocation(self.ctxt, rp_id, RAM_MB, 64)
nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes))
node = nodes[0]
self.assertEqual(4096, node['inv_memory_mb'])
self.assertEqual(64, node['inv_memory_mb_used'])
# Because of the complex join conditions, it's best to also test the
# other two resource classes and ensure that the joins are correct.
self.assertIsNone(node['inv_vcpus'])
self.assertIsNone(node['inv_vcpus_used'])
self.assertIsNone(node['inv_local_gb'])
self.assertIsNone(node['inv_local_gb_used'])
create_inventory(self.ctxt, rp_id, VCPU, 16)
create_allocation(self.ctxt, rp_id, VCPU, 2)
nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes))
node = nodes[0]
self.assertEqual(16, node['inv_vcpus'])
self.assertEqual(2, node['inv_vcpus_used'])
# Check to make sure the other resources stayed the same...
self.assertEqual(4096, node['inv_memory_mb'])
self.assertEqual(64, node['inv_memory_mb_used'])
create_inventory(self.ctxt, rp_id, DISK_GB, 100)
create_allocation(self.ctxt, rp_id, DISK_GB, 20)
nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes))
node = nodes[0]
self.assertEqual(100, node['inv_local_gb'])
self.assertEqual(20, node['inv_local_gb_used'])
# Check to make sure the other resources stayed the same...
self.assertEqual(4096, node['inv_memory_mb'])
self.assertEqual(64, node['inv_memory_mb_used'])
self.assertEqual(16, node['inv_vcpus'])
self.assertEqual(2, node['inv_vcpus_used'])
def test_compute_node_get_all_deleted_compute_node(self):
# Create a service and compute node and ensure we can find its stats;
# delete the service and compute node when done and loop again
@@ -7448,7 +7560,8 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
key=lambda n: n['hypervisor_hostname'])
self._assertEqualListsOfObjects(expected, result,
ignored_keys=['stats'])
ignored_keys=self._ignored_temp_resource_providers_keys +
['stats'])
def test_compute_node_get_all_by_host_with_distinct_hosts(self):
# Create another service with another node