Log a warning and add nova-status check for old API service versions
Change Ib984c30543acb3ca9cb95fb53d44d9ded0f5a5c8, which was added in Newton when cells v2 was optional, added some transitional code to the API for looking up an instance, which didn't rely on instance mappings in a cell to find the instance if the minimum nova-osapi_compute service version was from before Ocata. People have reported this being a source of confusion when upgrading from before Ocata, when cells v2 wasn't required, to Ocata+ where cells v2 along with the mapping setup is required. That's because they might have older nova-osapi_compute service version records in their 'nova' (cell) database which makes the API think the code is older than it actually is, and results in an InstanceNotFound error. This change does two things: 1. Adds a warning to the compute API code in this scenario to serve as a breadcrumb if a deployment hits this issue. 2. A nova-status check to look for minimum nova-osapi_compute service versions across all cells and report the issue as a warning. It's not an upgrade failure since we don't know how the nova-api service is configured, but leave that investigation up to the deployer. This is also written in such a way that we should be able to backport this through to stable/ocata. Change-Id: Ie2bc4616439352850cf29a9de7d33a06c8f7c2b8 Closes-Bug: #1759316
This commit is contained in:
parent
e2d5dc4e2c
commit
eaf6340847
@ -109,6 +109,10 @@ Upgrade
|
||||
* Checks for the Placement API are modified to require version 1.21.
|
||||
* Checks that ironic instances have had their embedded flavors migrated to
|
||||
use custom resource classes.
|
||||
* Checks for ``nova-osapi_compute`` service versions that are less than 15
|
||||
across all cell mappings which might cause issues when querying instances
|
||||
depending on how the **nova-api** service is configured.
|
||||
See https://bugs.launchpad.net/nova/+bug/1759316 for details.
|
||||
|
||||
See Also
|
||||
========
|
||||
|
@ -33,6 +33,7 @@ import pkg_resources
|
||||
import prettytable
|
||||
from sqlalchemy import func as sqlfunc
|
||||
from sqlalchemy import MetaData, Table, and_, select
|
||||
from sqlalchemy.sql import false
|
||||
|
||||
from nova.cmd import common as cmd_common
|
||||
import nova.conf
|
||||
@ -440,6 +441,74 @@ class UpgradeCommands(object):
|
||||
# those nodes are already migrated, so there is nothing to do.
|
||||
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
|
||||
|
||||
def _get_min_service_version(self, context, binary):
|
||||
meta = MetaData(bind=db_session.get_engine(context=context))
|
||||
services = Table('services', meta, autoload=True)
|
||||
return select([sqlfunc.min(services.c.version)]).select_from(
|
||||
services).where(and_(
|
||||
services.c.binary == binary,
|
||||
services.c.deleted == 0,
|
||||
services.c.forced_down == false())).scalar()
|
||||
|
||||
def _check_api_service_version(self):
|
||||
"""Checks nova-osapi_compute service versions across cells.
|
||||
|
||||
For non-cellsv1 deployments, based on how the [database]/connection
|
||||
is configured for the nova-api service, the nova-osapi_compute service
|
||||
versions before 15 will only attempt to lookup instances from the
|
||||
local database configured for the nova-api service directly.
|
||||
|
||||
This can cause issues if there are newer API service versions in cell1
|
||||
after the upgrade to Ocata, but lingering older API service versions
|
||||
in an older database.
|
||||
|
||||
This check will scan all cells looking for a minimum nova-osapi_compute
|
||||
service version less than 15 and if found, emit a warning that those
|
||||
service entries likely need to be cleaned up.
|
||||
"""
|
||||
# If we're using cells v1 then we don't care about this.
|
||||
if CONF.cells.enable:
|
||||
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
|
||||
|
||||
meta = MetaData(bind=db_session.get_api_engine())
|
||||
cell_mappings = Table('cell_mappings', meta, autoload=True)
|
||||
mappings = cell_mappings.select().execute().fetchall()
|
||||
|
||||
if not mappings:
|
||||
# There are no cell mappings so we can't determine this, just
|
||||
# return a warning. The cellsv2 check would have already failed
|
||||
# on this.
|
||||
msg = (_('Unable to determine API service versions without '
|
||||
'cell mappings.'))
|
||||
return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg)
|
||||
|
||||
ctxt = nova_context.get_admin_context()
|
||||
cells_with_old_api_services = []
|
||||
for mapping in mappings:
|
||||
with nova_context.target_cell(ctxt, mapping) as cctxt:
|
||||
# Get the minimum nova-osapi_compute service version in this
|
||||
# cell.
|
||||
min_version = self._get_min_service_version(
|
||||
cctxt, 'nova-osapi_compute')
|
||||
if min_version is not None and min_version < 15:
|
||||
cells_with_old_api_services.append(mapping['uuid'])
|
||||
|
||||
# If there are any cells with older API versions, we report it as a
|
||||
# warning since we don't know how the actual nova-api service is
|
||||
# configured, but we need to give the operator some indication that
|
||||
# they have something to investigate/cleanup.
|
||||
if cells_with_old_api_services:
|
||||
msg = (_("The following cells have 'nova-osapi_compute' services "
|
||||
"with version < 15 which may cause issues when querying "
|
||||
"instances from the API: %s. Depending on how nova-api "
|
||||
"is configured, this may not be a problem, but is worth "
|
||||
"investigating and potentially cleaning up those older "
|
||||
"records. See "
|
||||
"https://bugs.launchpad.net/nova/+bug/1759316 for "
|
||||
"details.") % ', '.join(cells_with_old_api_services))
|
||||
return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg)
|
||||
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
|
||||
|
||||
# The format of the check functions is to return an UpgradeCheckResult
|
||||
# object with the appropriate UpgradeCheckCode and details set. If the
|
||||
# check hits warnings or failures then those should be stored in the
|
||||
@ -455,7 +524,9 @@ class UpgradeCommands(object):
|
||||
# Added in Ocata
|
||||
(_('Resource Providers'), _check_resource_providers),
|
||||
# Added in Rocky (but also useful going back to Pike)
|
||||
(_('Ironic Flavor Migration'), _check_ironic_flavor_migration)
|
||||
(_('Ironic Flavor Migration'), _check_ironic_flavor_migration),
|
||||
# Added in Rocky (but is backportable to Ocata)
|
||||
(_('API Service Version'), _check_api_service_version)
|
||||
)
|
||||
|
||||
def _get_details(self, upgrade_check_result):
|
||||
|
@ -2269,6 +2269,22 @@ class API(base.Base):
|
||||
# merged replica instead of the cell directly, so fall through
|
||||
# here in that case as well.
|
||||
if service_version < 15 or CONF.cells.enable:
|
||||
# If not using cells v1, we need to log a warning about the API
|
||||
# service version being less than 15 (that check was added in
|
||||
# newton), which indicates there is some lingering data during the
|
||||
# transition to cells v2 which could cause an InstanceNotFound
|
||||
# here. The warning message is a sort of breadcrumb.
|
||||
# This can all go away once we drop cells v1 and assert that all
|
||||
# deployments have upgraded from a base cells v2 setup with
|
||||
# mappings.
|
||||
if not CONF.cells.enable:
|
||||
LOG.warning('The nova-osapi_compute service version is from '
|
||||
'before Ocata and may cause problems looking up '
|
||||
'instances in a cells v2 setup. Check your '
|
||||
'nova-api service configuration and cell '
|
||||
'mappings. You may need to remove stale '
|
||||
'nova-osapi_compute service records from the cell '
|
||||
'database.')
|
||||
return objects.Instance.get_by_uuid(context, instance_uuid,
|
||||
expected_attrs=expected_attrs)
|
||||
inst_map = self._get_instance_map_or_none(context, instance_uuid)
|
||||
|
@ -813,3 +813,107 @@ class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase):
|
||||
for cell_id in
|
||||
sorted(unmigrated_instance_count_by_cell.keys())),
|
||||
result.details)
|
||||
|
||||
|
||||
class TestUpgradeCheckAPIServiceVersion(test.NoDBTestCase):
|
||||
"""Tests for the nova-status upgrade API service version specific check."""
|
||||
|
||||
# We'll setup the database ourselves because we need to use cells fixtures
|
||||
# for multiple cell mappings.
|
||||
USES_DB_SELF = True
|
||||
|
||||
# This will create three cell mappings: cell0, cell1 (default) and cell2
|
||||
NUMBER_OF_CELLS = 2
|
||||
|
||||
def setUp(self):
|
||||
super(TestUpgradeCheckAPIServiceVersion, self).setUp()
|
||||
self.output = StringIO()
|
||||
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
|
||||
self.useFixture(nova_fixtures.Database(database='api'))
|
||||
self.cmd = status.UpgradeCommands()
|
||||
|
||||
def test_check_cells_v1_enabled(self):
|
||||
"""This is a 'success' case since the API service version check is
|
||||
ignored when running cells v1.
|
||||
"""
|
||||
self.flags(enable=True, group='cells')
|
||||
result = self.cmd._check_api_service_version()
|
||||
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
|
||||
|
||||
def test_check_no_cell_mappings_warning(self):
|
||||
"""Warn when there are no cell mappings."""
|
||||
result = self.cmd._check_api_service_version()
|
||||
self.assertEqual(status.UpgradeCheckCode.WARNING, result.code)
|
||||
self.assertEqual('Unable to determine API service versions without '
|
||||
'cell mappings.', result.details)
|
||||
|
||||
@staticmethod
|
||||
def _create_service(ctxt, host, binary, version):
|
||||
svc = objects.Service(ctxt, host=host, binary=binary)
|
||||
svc.version = version
|
||||
svc.create()
|
||||
return svc
|
||||
|
||||
def test_check_warning(self):
|
||||
"""This is a failure scenario where we have the following setup:
|
||||
|
||||
Three cells where:
|
||||
|
||||
1. The first cell has two API services, one with version < 15 and one
|
||||
with version >= 15.
|
||||
2. The second cell has two services, one with version < 15 but it's
|
||||
deleted so it gets filtered out, and one with version >= 15.
|
||||
3. The third cell doesn't have any API services, just old compute
|
||||
services which should be filtered out.
|
||||
|
||||
In this scenario, the first cell should be reported with a warning.
|
||||
"""
|
||||
self._setup_cells()
|
||||
ctxt = context.get_admin_context()
|
||||
cell0 = self.cell_mappings['cell0']
|
||||
with context.target_cell(ctxt, cell0) as cctxt:
|
||||
self._create_service(cctxt, host='cell0host1',
|
||||
binary='nova-osapi_compute', version=14)
|
||||
self._create_service(cctxt, host='cell0host2',
|
||||
binary='nova-osapi_compute', version=15)
|
||||
|
||||
cell1 = self.cell_mappings['cell1']
|
||||
with context.target_cell(ctxt, cell1) as cctxt:
|
||||
svc = self._create_service(
|
||||
cctxt, host='cell1host1', binary='nova-osapi_compute',
|
||||
version=14)
|
||||
# This deleted record with the old version should get filtered out.
|
||||
svc.destroy()
|
||||
self._create_service(cctxt, host='cell1host2',
|
||||
binary='nova-osapi_compute', version=16)
|
||||
|
||||
cell2 = self.cell_mappings['cell2']
|
||||
with context.target_cell(ctxt, cell2) as cctxt:
|
||||
self._create_service(cctxt, host='cell2host1',
|
||||
binary='nova-compute', version=14)
|
||||
|
||||
result = self.cmd._check_api_service_version()
|
||||
self.assertEqual(status.UpgradeCheckCode.WARNING, result.code)
|
||||
# The only cell in the message should be cell0.
|
||||
self.assertIn(cell0.uuid, result.details)
|
||||
self.assertNotIn(cell1.uuid, result.details)
|
||||
self.assertNotIn(cell2.uuid, result.details)
|
||||
|
||||
def test_check_success(self):
|
||||
"""Tests the success scenario where we have cell0 with a current API
|
||||
service, cell1 with no API services, and an empty cell2.
|
||||
"""
|
||||
self._setup_cells()
|
||||
ctxt = context.get_admin_context()
|
||||
cell0 = self.cell_mappings['cell0']
|
||||
with context.target_cell(ctxt, cell0) as cctxt:
|
||||
self._create_service(cctxt, host='cell0host1',
|
||||
binary='nova-osapi_compute', version=15)
|
||||
|
||||
cell1 = self.cell_mappings['cell1']
|
||||
with context.target_cell(ctxt, cell1) as cctxt:
|
||||
self._create_service(cctxt, host='cell1host1',
|
||||
binary='nova-compute', version=15)
|
||||
|
||||
result = self.cmd._check_api_service_version()
|
||||
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
|
||||
|
@ -0,0 +1,13 @@
|
||||
---
|
||||
upgrade:
|
||||
- |
|
||||
A new check is added to ``nova-status upgrade check`` which will scan
|
||||
all cells looking for ``nova-osapi_compute`` service versions which are
|
||||
from before Ocata and which may cause issues with how the compute API
|
||||
finds instances. This will result in a warning if:
|
||||
|
||||
* No cell mappings are found
|
||||
* The minimum ``nova-osapi_compute`` service version is less than 15 in
|
||||
any given cell
|
||||
|
||||
See https://bugs.launchpad.net/nova/+bug/1759316 for more details.
|
Loading…
Reference in New Issue
Block a user