From 001f3a7bfe6b2c8af135daff8e154a708792070e Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 6 Feb 2020 09:21:38 -0800 Subject: [PATCH] Fix instance.hidden migration and querying It was discovered that default= on a Column definition in a schema migration will attempt to update the table with the provided value, instead of just translating on read, which is often the assumption. The Instance.hidden=False change introduced in Train[1] used such a default on the new column, which caused at least one real-world deployment to time out rewriting the instances table due to size. Apparently SQLAlchemy-migrate also does not consider such a timeout to be a failure and proceeds on. The end result is that some existing instances in the database have hidden=NULL values, and the DB model layer does not convert those to hidden=False when we read/query them, causing those instances to be excluded from the API list view. This change alters the 399 schema migration to remove the default=False specification. This does not actually change the schema, but /will/ prevent users who have not yet upgraded to Train from rewriting the table. This change also makes the instance_get_all_by_filters() code handle hidden specially, including false and NULL in a query for non-hidden instances. A future change should add a developer trap test to ensure that future migrations do not add default= values to new columns to avoid this situation in the future. [1] Iaffb27bd8c562ba120047c04bb62619c0864f594 Change-Id: Iace3f653b42c20887b40ee0105c8e9a4edeff1f7 Closes-Bug: #1862205 --- nova/db/sqlalchemy/api.py | 12 +++-- .../versions/399_add_instances_hidden.py | 6 ++- nova/objects/instance.py | 4 +- nova/tests/unit/objects/test_instance.py | 50 ++++++++++++++++++- ...ter_upgrade_to_train-9ce4731f31bc6bd2.yaml | 19 +++++++ 5 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/instances_hidden_after_upgrade_to_train-9ce4731f31bc6bd2.yaml diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 4b9e092fb572..b3e974ab725e 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -2205,9 +2205,15 @@ def instance_get_all_by_filters_sort(context, filters, limit=None, marker=None, else: filters['user_id'] = context.user_id - if 'hidden' not in filters: - # Filter out hidden instances by default. - filters['hidden'] = False + if filters.pop('hidden', False): + query_prefix = query_prefix.filter(models.Instance.hidden == true()) + else: + # If the query should not include hidden instances, then + # filter instances with hidden=False or hidden=NULL because + # older records may have no value set. + query_prefix = query_prefix.filter(or_( + models.Instance.hidden == false(), + models.Instance.hidden == null())) # Filters for exact matches that we can do along with the SQL query... # For other filters that don't match this, we will do regexp matching diff --git a/nova/db/sqlalchemy/migrate_repo/versions/399_add_instances_hidden.py b/nova/db/sqlalchemy/migrate_repo/versions/399_add_instances_hidden.py index 1e96d0688620..aa382bab3c44 100644 --- a/nova/db/sqlalchemy/migrate_repo/versions/399_add_instances_hidden.py +++ b/nova/db/sqlalchemy/migrate_repo/versions/399_add_instances_hidden.py @@ -20,5 +20,9 @@ def upgrade(migrate_engine): for prefix in ('', 'shadow_'): instances = Table('%sinstances' % prefix, meta, autoload=True) if not hasattr(instances.c, 'hidden'): - hidden = Column('hidden', Boolean, default=False) + # NOTE(danms): This column originally included default=False. We + # discovered in bug #1862205 that this will attempt to rewrite + # the entire instances table with that value, which can time out + # for large data sets (and does not even abort). + hidden = Column('hidden', Boolean) instances.create_column(hidden) diff --git a/nova/objects/instance.py b/nova/objects/instance.py index 3233d19c79f9..324cab791154 100644 --- a/nova/objects/instance.py +++ b/nova/objects/instance.py @@ -1517,7 +1517,9 @@ class InstanceList(base.ObjectListBase, base.NovaObject): # NOTE(mriedem): Filter out hidden instances since there should be a # non-hidden version of the instance in another cell database and the # API will only show one of them, so we don't count the hidden copy. - project_query = project_query.filter_by(hidden=false()) + project_query = project_query.filter( + or_(models.Instance.hidden == false(), + models.Instance.hidden == null())) project_result = project_query.first() fields = ('instances', 'cores', 'ram') diff --git a/nova/tests/unit/objects/test_instance.py b/nova/tests/unit/objects/test_instance.py index 20584725929f..9c61fae18376 100644 --- a/nova/tests/unit/objects/test_instance.py +++ b/nova/tests/unit/objects/test_instance.py @@ -27,6 +27,8 @@ from oslo_versionedobjects import base as ovo_base from nova.compute import task_states from nova.compute import vm_states from nova.db import api as db +from nova.db.sqlalchemy import api as sql_api +from nova.db.sqlalchemy import models as sql_models from nova import exception from nova.network import model as network_model from nova import notifications @@ -1964,7 +1966,53 @@ class _TestInstanceListObject(object): class TestInstanceListObject(test_objects._LocalTest, _TestInstanceListObject): - pass + # No point in doing this db-specific test twice for remote + def test_hidden_filter_query(self): + """Check that our instance_get_by_filters() honors hidden properly + + As reported in bug #1862205, we need to properly handle instances + with the hidden field set to NULL and not expect SQLAlchemy to + translate those values on SELECT. + """ + values = {'user_id': self.context.user_id, + 'project_id': self.context.project_id, + 'host': 'foo'} + + for hidden_value in (True, False): + db.instance_create(self.context, + dict(values, hidden=hidden_value)) + + # NOTE(danms): Because the model has default=False, we can not use + # it to create an instance with a hidden value of NULL. So, do it + # manually here. + engine = sql_api.get_engine() + table = sql_models.Instance.__table__ + with engine.connect() as conn: + update = table.insert().values(user_id=self.context.user_id, + project_id=self.context.project_id, + uuid=uuids.nullinst, + host='foo', + hidden=None) + conn.execute(update) + + insts = objects.InstanceList.get_by_filters(self.context, + {'hidden': True}) + # We created one hidden instance above, so expect only that one + # to come out of this query. + self.assertEqual(1, len(insts)) + + # We created one unhidden instance above, and one specifically + # with a NULL value to represent an unmigrated instance, which + # defaults to hidden=False, so expect both of those here. + insts = objects.InstanceList.get_by_filters(self.context, + {'hidden': False}) + self.assertEqual(2, len(insts)) + + # Do the same check as above, but make sure hidden=False is the + # default behavior. + insts = objects.InstanceList.get_by_filters(self.context, + {}) + self.assertEqual(2, len(insts)) class TestRemoteInstanceListObject(test_objects._RemoteTest, diff --git a/releasenotes/notes/instances_hidden_after_upgrade_to_train-9ce4731f31bc6bd2.yaml b/releasenotes/notes/instances_hidden_after_upgrade_to_train-9ce4731f31bc6bd2.yaml new file mode 100644 index 000000000000..4bbbb1569429 --- /dev/null +++ b/releasenotes/notes/instances_hidden_after_upgrade_to_train-9ce4731f31bc6bd2.yaml @@ -0,0 +1,19 @@ +--- +upgrade: + - | + Upgrading to Train on a deployment with a large database may hit + `bug 1862205`_, which results in instance records left in a bad + state, and manifests as instances not being shown in list + operations. Users upgrading to Train for the first time will + definitely want to apply a version which includes this fix. Users + already on Train should upgrade to a version including this fix to + ensure the problem is addressed. + + .. _bug 1862205: https://launchpad.net/bugs/1862205 +fixes: + - | + A fix for serious `bug 1862205`_ is provided which addresses both + the performance aspect of schema migration 399, as well as the + potential fallout for cases where this migration silently fails + and leaves large numbers of instances hidden from view from the + API.