diff --git a/doc/source/cli/nova-status.rst b/doc/source/cli/nova-status.rst index f82ec5f706c6..37330fbfb34f 100644 --- a/doc/source/cli/nova-status.rst +++ b/doc/source/cli/nova-status.rst @@ -109,6 +109,10 @@ Upgrade * Checks for the Placement API are modified to require version 1.21. * Checks that ironic instances have had their embedded flavors migrated to use custom resource classes. + * Checks for ``nova-osapi_compute`` service versions that are less than 15 + across all cell mappings which might cause issues when querying instances + depending on how the **nova-api** service is configured. + See https://bugs.launchpad.net/nova/+bug/1759316 for details. See Also ======== diff --git a/nova/cmd/status.py b/nova/cmd/status.py index 4f92c6244c0d..c321dfa2f2af 100644 --- a/nova/cmd/status.py +++ b/nova/cmd/status.py @@ -33,6 +33,7 @@ import pkg_resources import prettytable from sqlalchemy import func as sqlfunc from sqlalchemy import MetaData, Table, and_, select +from sqlalchemy.sql import false from nova.cmd import common as cmd_common import nova.conf @@ -440,6 +441,74 @@ class UpgradeCommands(object): # those nodes are already migrated, so there is nothing to do. return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + def _get_min_service_version(self, context, binary): + meta = MetaData(bind=db_session.get_engine(context=context)) + services = Table('services', meta, autoload=True) + return select([sqlfunc.min(services.c.version)]).select_from( + services).where(and_( + services.c.binary == binary, + services.c.deleted == 0, + services.c.forced_down == false())).scalar() + + def _check_api_service_version(self): + """Checks nova-osapi_compute service versions across cells. + + For non-cellsv1 deployments, based on how the [database]/connection + is configured for the nova-api service, the nova-osapi_compute service + versions before 15 will only attempt to lookup instances from the + local database configured for the nova-api service directly. + + This can cause issues if there are newer API service versions in cell1 + after the upgrade to Ocata, but lingering older API service versions + in an older database. + + This check will scan all cells looking for a minimum nova-osapi_compute + service version less than 15 and if found, emit a warning that those + service entries likely need to be cleaned up. + """ + # If we're using cells v1 then we don't care about this. + if CONF.cells.enable: + return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + + meta = MetaData(bind=db_session.get_api_engine()) + cell_mappings = Table('cell_mappings', meta, autoload=True) + mappings = cell_mappings.select().execute().fetchall() + + if not mappings: + # There are no cell mappings so we can't determine this, just + # return a warning. The cellsv2 check would have already failed + # on this. + msg = (_('Unable to determine API service versions without ' + 'cell mappings.')) + return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg) + + ctxt = nova_context.get_admin_context() + cells_with_old_api_services = [] + for mapping in mappings: + with nova_context.target_cell(ctxt, mapping) as cctxt: + # Get the minimum nova-osapi_compute service version in this + # cell. + min_version = self._get_min_service_version( + cctxt, 'nova-osapi_compute') + if min_version is not None and min_version < 15: + cells_with_old_api_services.append(mapping['uuid']) + + # If there are any cells with older API versions, we report it as a + # warning since we don't know how the actual nova-api service is + # configured, but we need to give the operator some indication that + # they have something to investigate/cleanup. + if cells_with_old_api_services: + msg = (_("The following cells have 'nova-osapi_compute' services " + "with version < 15 which may cause issues when querying " + "instances from the API: %s. Depending on how nova-api " + "is configured, this may not be a problem, but is worth " + "investigating and potentially cleaning up those older " + "records. See " + "https://bugs.launchpad.net/nova/+bug/1759316 for " + "details.") % ', '.join(cells_with_old_api_services)) + return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg) + return UpgradeCheckResult(UpgradeCheckCode.SUCCESS) + # The format of the check functions is to return an UpgradeCheckResult # object with the appropriate UpgradeCheckCode and details set. If the # check hits warnings or failures then those should be stored in the @@ -455,7 +524,9 @@ class UpgradeCommands(object): # Added in Ocata (_('Resource Providers'), _check_resource_providers), # Added in Rocky (but also useful going back to Pike) - (_('Ironic Flavor Migration'), _check_ironic_flavor_migration) + (_('Ironic Flavor Migration'), _check_ironic_flavor_migration), + # Added in Rocky (but is backportable to Ocata) + (_('API Service Version'), _check_api_service_version) ) def _get_details(self, upgrade_check_result): diff --git a/nova/compute/api.py b/nova/compute/api.py index 6594c29981b6..bdae727d0338 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2269,6 +2269,22 @@ class API(base.Base): # merged replica instead of the cell directly, so fall through # here in that case as well. if service_version < 15 or CONF.cells.enable: + # If not using cells v1, we need to log a warning about the API + # service version being less than 15 (that check was added in + # newton), which indicates there is some lingering data during the + # transition to cells v2 which could cause an InstanceNotFound + # here. The warning message is a sort of breadcrumb. + # This can all go away once we drop cells v1 and assert that all + # deployments have upgraded from a base cells v2 setup with + # mappings. + if not CONF.cells.enable: + LOG.warning('The nova-osapi_compute service version is from ' + 'before Ocata and may cause problems looking up ' + 'instances in a cells v2 setup. Check your ' + 'nova-api service configuration and cell ' + 'mappings. You may need to remove stale ' + 'nova-osapi_compute service records from the cell ' + 'database.') return objects.Instance.get_by_uuid(context, instance_uuid, expected_attrs=expected_attrs) inst_map = self._get_instance_map_or_none(context, instance_uuid) diff --git a/nova/tests/unit/cmd/test_status.py b/nova/tests/unit/cmd/test_status.py index ec74f674bccf..367a72663a13 100644 --- a/nova/tests/unit/cmd/test_status.py +++ b/nova/tests/unit/cmd/test_status.py @@ -813,3 +813,107 @@ class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase): for cell_id in sorted(unmigrated_instance_count_by_cell.keys())), result.details) + + +class TestUpgradeCheckAPIServiceVersion(test.NoDBTestCase): + """Tests for the nova-status upgrade API service version specific check.""" + + # We'll setup the database ourselves because we need to use cells fixtures + # for multiple cell mappings. + USES_DB_SELF = True + + # This will create three cell mappings: cell0, cell1 (default) and cell2 + NUMBER_OF_CELLS = 2 + + def setUp(self): + super(TestUpgradeCheckAPIServiceVersion, self).setUp() + self.output = StringIO() + self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) + self.useFixture(nova_fixtures.Database(database='api')) + self.cmd = status.UpgradeCommands() + + def test_check_cells_v1_enabled(self): + """This is a 'success' case since the API service version check is + ignored when running cells v1. + """ + self.flags(enable=True, group='cells') + result = self.cmd._check_api_service_version() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) + + def test_check_no_cell_mappings_warning(self): + """Warn when there are no cell mappings.""" + result = self.cmd._check_api_service_version() + self.assertEqual(status.UpgradeCheckCode.WARNING, result.code) + self.assertEqual('Unable to determine API service versions without ' + 'cell mappings.', result.details) + + @staticmethod + def _create_service(ctxt, host, binary, version): + svc = objects.Service(ctxt, host=host, binary=binary) + svc.version = version + svc.create() + return svc + + def test_check_warning(self): + """This is a failure scenario where we have the following setup: + + Three cells where: + + 1. The first cell has two API services, one with version < 15 and one + with version >= 15. + 2. The second cell has two services, one with version < 15 but it's + deleted so it gets filtered out, and one with version >= 15. + 3. The third cell doesn't have any API services, just old compute + services which should be filtered out. + + In this scenario, the first cell should be reported with a warning. + """ + self._setup_cells() + ctxt = context.get_admin_context() + cell0 = self.cell_mappings['cell0'] + with context.target_cell(ctxt, cell0) as cctxt: + self._create_service(cctxt, host='cell0host1', + binary='nova-osapi_compute', version=14) + self._create_service(cctxt, host='cell0host2', + binary='nova-osapi_compute', version=15) + + cell1 = self.cell_mappings['cell1'] + with context.target_cell(ctxt, cell1) as cctxt: + svc = self._create_service( + cctxt, host='cell1host1', binary='nova-osapi_compute', + version=14) + # This deleted record with the old version should get filtered out. + svc.destroy() + self._create_service(cctxt, host='cell1host2', + binary='nova-osapi_compute', version=16) + + cell2 = self.cell_mappings['cell2'] + with context.target_cell(ctxt, cell2) as cctxt: + self._create_service(cctxt, host='cell2host1', + binary='nova-compute', version=14) + + result = self.cmd._check_api_service_version() + self.assertEqual(status.UpgradeCheckCode.WARNING, result.code) + # The only cell in the message should be cell0. + self.assertIn(cell0.uuid, result.details) + self.assertNotIn(cell1.uuid, result.details) + self.assertNotIn(cell2.uuid, result.details) + + def test_check_success(self): + """Tests the success scenario where we have cell0 with a current API + service, cell1 with no API services, and an empty cell2. + """ + self._setup_cells() + ctxt = context.get_admin_context() + cell0 = self.cell_mappings['cell0'] + with context.target_cell(ctxt, cell0) as cctxt: + self._create_service(cctxt, host='cell0host1', + binary='nova-osapi_compute', version=15) + + cell1 = self.cell_mappings['cell1'] + with context.target_cell(ctxt, cell1) as cctxt: + self._create_service(cctxt, host='cell1host1', + binary='nova-compute', version=15) + + result = self.cmd._check_api_service_version() + self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code) diff --git a/releasenotes/notes/bug-1759316-nova-status-api-version-check-183fac0525bfd68c.yaml b/releasenotes/notes/bug-1759316-nova-status-api-version-check-183fac0525bfd68c.yaml new file mode 100644 index 000000000000..a7c22bc4d4ba --- /dev/null +++ b/releasenotes/notes/bug-1759316-nova-status-api-version-check-183fac0525bfd68c.yaml @@ -0,0 +1,13 @@ +--- +upgrade: + - | + A new check is added to ``nova-status upgrade check`` which will scan + all cells looking for ``nova-osapi_compute`` service versions which are + from before Ocata and which may cause issues with how the compute API + finds instances. This will result in a warning if: + + * No cell mappings are found + * The minimum ``nova-osapi_compute`` service version is less than 15 in + any given cell + + See https://bugs.launchpad.net/nova/+bug/1759316 for more details.