From 4540cd6ef9731837cf4ed1e56850bcfa3512ae1c Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Mon, 13 May 2019 16:04:39 +0100 Subject: [PATCH] Fix retry of instance_update_and_get_original _instance_update modifies its 'values' argument. Consequently if it is retried due to an update conflict, the second invocation has the wrong arguments. A specific issue this causes is that if we called it with expected_task_state a concurrent modification to task_state will cause us to fail and retry. However, expected_task_state will have been popped from values on the first invocation and will not be present for the second. Consequently the second invocation will fail to perform the task_state check and therefore succeed, resulting in a race. We rewrite the old race unit test which wasn't testing the correct thing for 2 reasons: 1. Due to the bug fixed in this patch, although we were calling update_on_match() twice, the second call didn't check the task state. 2. side_effect=iterable returns function items without executing them, but we weren't hitting this due to the bug fixed in this patch. Closes-Bug: #1821373 Change-Id: I01c63e685113bf30e687ccb14a4d18e344b306f6 (cherry picked from commit aae5c7aa3819ad9161fd2effed3872d540099230) (cherry picked from commit 61fef49b1555c6509398fc47c21319c1b5e50505) --- nova/db/sqlalchemy/api.py | 21 +++++++----- nova/tests/unit/db/test_db_api.py | 56 ++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 37d1beed29a7..5cf77cb2be80 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -2747,6 +2747,11 @@ def _instance_update(context, instance_uuid, values, expected, original=None): if not uuidutils.is_uuid_like(instance_uuid): raise exception.InvalidUUID(instance_uuid) + # NOTE(mdbooth): We pop values from this dict below, so we copy it here to + # ensure there are no side effects for the caller or if we retry the + # function due to a db conflict. + updates = copy.copy(values) + if expected is None: expected = {} else: @@ -2758,8 +2763,8 @@ def _instance_update(context, instance_uuid, values, expected, original=None): # updates for field in ('task_state', 'vm_state'): expected_field = 'expected_%s' % field - if expected_field in values: - value = values.pop(expected_field, None) + if expected_field in updates: + value = updates.pop(expected_field, None) # Coerce all single values to singleton lists if value is None: expected[field] = [None] @@ -2767,23 +2772,23 @@ def _instance_update(context, instance_uuid, values, expected, original=None): expected[field] = sqlalchemyutils.to_list(value) # Values which need to be updated separately - metadata = values.pop('metadata', None) - system_metadata = values.pop('system_metadata', None) + metadata = updates.pop('metadata', None) + system_metadata = updates.pop('system_metadata', None) - _handle_objects_related_type_conversions(values) + _handle_objects_related_type_conversions(updates) # Hostname is potentially unique, but this is enforced in code rather # than the DB. The query below races, but the number of users of # osapi_compute_unique_server_name_scope is small, and a robust fix # will be complex. This is intentionally left as is for the moment. - if 'hostname' in values: - _validate_unique_server_name(context, values['hostname']) + if 'hostname' in updates: + _validate_unique_server_name(context, updates['hostname']) compare = models.Instance(uuid=instance_uuid, **expected) try: instance_ref = model_query(context, models.Instance, project_only=True).\ - update_on_match(compare, 'uuid', values) + update_on_match(compare, 'uuid', updates) except update_match.NoRowsMatched: # Update failed. Try to find why and raise a specific error. diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 0433f126b1ba..e75d5e356ddc 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -44,6 +44,7 @@ from sqlalchemy import inspect from sqlalchemy import Integer from sqlalchemy import MetaData from sqlalchemy.orm import query +from sqlalchemy.orm import session as sqla_session from sqlalchemy import sql from sqlalchemy import Table @@ -2558,21 +2559,50 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin): test(self.ctxt) def test_instance_update_and_get_original_conflict_race(self): - # Ensure that we retry if update_on_match fails for no discernable - # reason - instance = self.create_instance_with_args() + # Ensure that we correctly process expected_task_state when retrying + # due to an unknown conflict - orig_update_on_match = update_match.update_on_match + # This requires modelling the MySQL read view, which means that if we + # have read something in the current transaction and we read it again, + # we will read the same data every time even if another committed + # transaction has since altered that data. In this test we have an + # instance whose task state was originally None, but has been set to + # SHELVING by another, concurrent transaction. Therefore the first time + # we read the data we will read None, but when we restart the + # transaction we will read the correct data. - # Reproduce the conditions of a race between fetching and updating the - # instance by making update_on_match fail for no discernable reason the - # first time it is called, but work normally the second time. - with mock.patch.object(update_match, 'update_on_match', - side_effect=[update_match.NoRowsMatched, - orig_update_on_match]): - db.instance_update_and_get_original( - self.ctxt, instance['uuid'], {'metadata': {'mk1': 'mv3'}}) - self.assertEqual(update_match.update_on_match.call_count, 2) + instance = self.create_instance_with_args( + task_state=task_states.SHELVING) + + instance_out_of_date = copy.copy(instance) + instance_out_of_date['task_state'] = None + + # NOTE(mdbooth): SQLA magic which makes this dirty object look + # like a freshly loaded one. + sqla_session.make_transient(instance_out_of_date) + sqla_session.make_transient_to_detached(instance_out_of_date) + + # update_on_match will fail first time because the actual task state + # (SHELVING) doesn't match the expected task state (None). However, + # we ensure that the first time we fetch the instance object we get + # out-of-date data. This forces us to retry the operation to find out + # what really went wrong. + with mock.patch.object(sqlalchemy_api, '_instance_get_by_uuid', + side_effect=[instance_out_of_date, instance]), \ + mock.patch.object(sqlalchemy_api, '_instance_update', + side_effect=sqlalchemy_api._instance_update): + self.assertRaises(exception.UnexpectedTaskStateError, + db.instance_update_and_get_original, + self.ctxt, instance['uuid'], + {'expected_task_state': [None]}) + sqlalchemy_api._instance_update.assert_has_calls([ + mock.call(self.ctxt, instance['uuid'], + {'expected_task_state': [None]}, None, + original=instance_out_of_date), + mock.call(self.ctxt, instance['uuid'], + {'expected_task_state': [None]}, None, + original=instance), + ]) def test_instance_update_and_get_original_conflict_race_fallthrough(self): # Ensure that is update_match continuously fails for no discernable