Add nova-status check for ironic flavor migration

In Pike we started requiring that ironic instances have their
embedded flavor migrated to track the ironic node custom
resource class. This can be done either via the normal running
of the nova-compute service and ironic driver or via the
'nova-manage db ironic_flavor_migration' command.

This change adds a nova-status check to see if there are any
unmigrated ironic instances across all non-cell0 cells, and
is based mostly on the same logic within the nova-manage command
except it's multi-cell aware and doesn't use the objects.

Change-Id: Ifd22325e849db2353b1b1eedfe998e3d6a79591c
This commit is contained in:
Matt Riedemann 2017-12-12 18:39:17 -05:00
parent 9ea731a652
commit 7eb6703521
4 changed files with 263 additions and 1 deletions

View File

@ -107,6 +107,8 @@ Upgrade
**18.0.0 (Rocky)**
* Checks for the Placement API are modified to require version 1.21.
* Checks that ironic instances have had their embedded flavors migrated to
use custom resource classes.
See Also
========

View File

@ -18,6 +18,7 @@ CLI interface for nova status commands.
from __future__ import print_function
import collections
# enum comes from the enum34 package if python < 3.4, else it's stdlib
import enum
import functools
@ -27,10 +28,11 @@ import traceback
from keystoneauth1 import exceptions as ks_exc
from oslo_config import cfg
from oslo_serialization import jsonutils
import pkg_resources
import prettytable
from sqlalchemy import func as sqlfunc
from sqlalchemy import MetaData, Table, select
from sqlalchemy import MetaData, Table, and_, select
from nova.cmd import common as cmd_common
import nova.conf
@ -344,6 +346,100 @@ class UpgradeCommands(object):
# We have RPs >= CNs which is what we want to see.
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
@staticmethod
def _is_ironic_instance_migrated(extras, inst):
extra = (extras.select().where(extras.c.instance_uuid == inst['uuid']
).execute().first())
# Pull the flavor and deserialize it. Note that the flavor info for an
# instance is a dict keyed by "cur", "old", "new" and we want the
# current flavor.
flavor = jsonutils.loads(extra['flavor'])['cur']['nova_object.data']
# Do we have a custom resource flavor extra spec?
specs = flavor['extra_specs'] if 'extra_specs' in flavor else {}
for spec_key in specs:
if spec_key.startswith('resources:CUSTOM_'):
# We found a match so this instance is good.
return True
return False
def _check_ironic_flavor_migration(self):
"""In Pike, ironic instances and flavors need to be migrated to use
custom resource classes. In ironic, the node.resource_class should be
set to some custom resource class value which should match a
"resources:<custom resource class name>" flavor extra spec on baremetal
flavors. Existing ironic instances will have their embedded
instance.flavor.extra_specs migrated to use the matching ironic
node.resource_class value in the nova-compute service, or they can
be forcefully migrated using "nova-manage db ironic_flavor_migration".
In this check, we look for all ironic compute nodes in all non-cell0
cells, and from those ironic compute nodes, we look for an instance
that has a "resources:CUSTOM_*" key in it's embedded flavor extra
specs.
"""
cell_mappings = self._get_non_cell0_mappings()
ctxt = nova_context.get_admin_context()
# dict of cell identifier (name or uuid) to number of unmigrated
# instances
unmigrated_instance_count_by_cell = collections.defaultdict(int)
for cell_mapping in cell_mappings:
with nova_context.target_cell(ctxt, cell_mapping) as cctxt:
# Get the (non-deleted) ironic compute nodes in this cell.
meta = MetaData(bind=db_session.get_engine(context=cctxt))
compute_nodes = Table('compute_nodes', meta, autoload=True)
ironic_nodes = (
compute_nodes.select().where(and_(
compute_nodes.c.hypervisor_type == 'ironic',
compute_nodes.c.deleted == 0
)).execute().fetchall())
if ironic_nodes:
# We have ironic nodes in this cell, let's iterate over
# them looking for instances.
instances = Table('instances', meta, autoload=True)
extras = Table('instance_extra', meta, autoload=True)
for node in ironic_nodes:
nodename = node['hypervisor_hostname']
# Get any (non-deleted) instances for this node.
ironic_instances = (
instances.select().where(and_(
instances.c.node == nodename,
instances.c.deleted == 0
)).execute().fetchall())
# Get the instance_extras for each instance so we can
# find the flavors.
for inst in ironic_instances:
if not self._is_ironic_instance_migrated(
extras, inst):
# We didn't find the extra spec key for this
# instance so increment the number of
# unmigrated instances in this cell.
unmigrated_instance_count_by_cell[
cell_mapping['uuid']] += 1
if not cell_mappings:
# There are no non-cell0 mappings so we can't determine this, just
# return a warning. The cellsv2 check would have already failed
# on this.
msg = (_('Unable to determine ironic flavor migration without '
'cell mappings.'))
return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg)
if unmigrated_instance_count_by_cell:
# There are unmigrated ironic instances, so we need to fail.
msg = (_('There are (cell=x) number of unmigrated instances in '
'each cell: %s. Run \'nova-manage db '
'ironic_flavor_migration\' on each cell.') %
' '.join('(%s=%s)' % (
cell_id, unmigrated_instance_count_by_cell[cell_id])
for cell_id in
sorted(unmigrated_instance_count_by_cell.keys())))
return UpgradeCheckResult(UpgradeCheckCode.FAILURE, msg)
# Either there were no ironic compute nodes or all instances for
# those nodes are already migrated, so there is nothing to do.
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
# The format of the check functions is to return an UpgradeCheckResult
# object with the appropriate UpgradeCheckCode and details set. If the
# check hits warnings or failures then those should be stored in the
@ -358,6 +454,8 @@ class UpgradeCommands(object):
(_('Placement API'), _check_placement),
# Added in Ocata
(_('Resource Providers'), _check_resource_providers),
# Added in Rocky (but also useful going back to Pike)
(_('Ironic Flavor Migration'), _check_ironic_flavor_migration)
)
def _get_details(self, upgrade_check_result):

View File

@ -657,3 +657,159 @@ class TestUpgradeCheckResourceProviders(test.NoDBTestCase):
result = self.cmd._check_resource_providers()
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
self.assertIsNone(result.details)
class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase):
"""Tests for the nova-status upgrade check on ironic flavor migration."""
# We'll setup the database ourselves because we need to use cells fixtures
# for multiple cell mappings.
USES_DB_SELF = True
# This will create three cell mappings: cell0, cell1 (default) and cell2
NUMBER_OF_CELLS = 2
def setUp(self):
super(TestUpgradeCheckIronicFlavorMigration, self).setUp()
self.output = StringIO()
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
# We always need the API DB to be setup.
self.useFixture(nova_fixtures.Database(database='api'))
self.cmd = status.UpgradeCommands()
@staticmethod
def _create_node_in_cell(ctxt, cell, hypervisor_type, nodename):
with context.target_cell(ctxt, cell) as cctxt:
cn = objects.ComputeNode(
context=cctxt,
hypervisor_type=hypervisor_type,
hypervisor_hostname=nodename,
# The rest of these values are fakes.
host=uuids.host,
vcpus=4,
memory_mb=8 * 1024,
local_gb=40,
vcpus_used=2,
memory_mb_used=2 * 1024,
local_gb_used=10,
hypervisor_version=1,
cpu_info='{"arch": "x86_64"}')
cn.create()
return cn
@staticmethod
def _create_instance_in_cell(ctxt, cell, node, is_deleted=False,
flavor_migrated=False):
with context.target_cell(ctxt, cell) as cctxt:
inst = objects.Instance(
context=cctxt,
host=node.host,
node=node.hypervisor_hostname,
uuid=uuidutils.generate_uuid())
inst.create()
if is_deleted:
inst.destroy()
else:
# Create an embedded flavor for the instance. We don't create
# this because we're in a cell context and flavors are global,
# but we don't actually care about global flavors in this
# check.
extra_specs = {}
if flavor_migrated:
extra_specs['resources:CUSTOM_BAREMETAL_GOLD'] = '1'
inst.flavor = objects.Flavor(cctxt, extra_specs=extra_specs)
inst.old_flavor = None
inst.new_flavor = None
inst.save()
return inst
def test_fresh_install_no_cell_mappings(self):
"""Tests the scenario where we don't have any cell mappings (no cells
v2 setup yet) so we don't know what state we're in and we return a
warning.
"""
result = self.cmd._check_ironic_flavor_migration()
self.assertEqual(status.UpgradeCheckCode.WARNING, result.code)
self.assertIn('Unable to determine ironic flavor migration without '
'cell mappings', result.details)
def test_fresh_install_no_computes(self):
"""Tests a fresh install scenario where we have two non-cell0 cells
but no compute nodes in either cell yet, so there is nothing to do
and we return success.
"""
self._setup_cells()
result = self.cmd._check_ironic_flavor_migration()
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
def test_mixed_computes_deleted_ironic_instance(self):
"""Tests the scenario where we have a kvm compute node in one cell
and an ironic compute node in another cell. The kvm compute node does
not have any instances. The ironic compute node has an instance with
the same hypervisor_hostname match but the instance is (soft) deleted
so it's ignored.
"""
self._setup_cells()
ctxt = context.get_admin_context()
# Create the ironic compute node in cell1
ironic_node = self._create_node_in_cell(
ctxt, self.cell_mappings['cell1'], 'ironic', uuids.node_uuid)
# Create the kvm compute node in cell2
self._create_node_in_cell(
ctxt, self.cell_mappings['cell2'], 'kvm', 'fake-kvm-host')
# Now create an instance in cell1 which is on the ironic node but is
# soft deleted (instance.deleted == instance.id).
self._create_instance_in_cell(
ctxt, self.cell_mappings['cell1'], ironic_node, is_deleted=True)
result = self.cmd._check_ironic_flavor_migration()
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
def test_unmigrated_ironic_instances(self):
"""Tests a scenario where we have two cells with only ironic compute
nodes. The first cell has one migrated and one unmigrated instance.
The second cell has two unmigrated instances. The result is the check
returns failure.
"""
self._setup_cells()
ctxt = context.get_admin_context()
# Create the ironic compute nodes in cell1
for x in range(2):
cell = self.cell_mappings['cell1']
ironic_node = self._create_node_in_cell(
ctxt, cell, 'ironic', getattr(uuids, 'cell1-node-%d' % x))
# Create an instance for this node. In cell1, we have one
# migrated and one unmigrated instance.
flavor_migrated = True if x % 2 else False
self._create_instance_in_cell(
ctxt, cell, ironic_node, flavor_migrated=flavor_migrated)
# Create the ironic compute nodes in cell2
for x in range(2):
cell = self.cell_mappings['cell2']
ironic_node = self._create_node_in_cell(
ctxt, cell, 'ironic', getattr(uuids, 'cell2-node-%d' % x))
# Create an instance for this node. In cell2, all instances are
# unmigrated.
self._create_instance_in_cell(
ctxt, cell, ironic_node, flavor_migrated=False)
result = self.cmd._check_ironic_flavor_migration()
self.assertEqual(status.UpgradeCheckCode.FAILURE, result.code)
# Check the message - it should point out cell1 has one unmigrated
# instance and cell2 has two unmigrated instances.
unmigrated_instance_count_by_cell = {
self.cell_mappings['cell1'].uuid: 1,
self.cell_mappings['cell2'].uuid: 2
}
self.assertIn(
'There are (cell=x) number of unmigrated instances in each '
'cell: %s.' % ' '.join('(%s=%s)' % (
cell_id, unmigrated_instance_count_by_cell[cell_id])
for cell_id in
sorted(unmigrated_instance_count_by_cell.keys())),
result.details)

View File

@ -0,0 +1,6 @@
---
upgrade:
- |
A new check is added to the ``nova-status upgrade check`` CLI which can
assist with determining if ironic instances have had their embedded flavor
migrated to use the corresponding ironic node custom resource class.