Retry update_user when sqlalchemy raises StaleDataErrors

Keystone's update_user() method in the SQL driver processes a lot of information about how to update users. This includes evaluating password logic and authentication attempts for PSI-DSS. This logic is evaluated after keystone pulls the user record from SQL and before it exits the context manager, which performs the write. When multiple clients are all updating the same user reference, it's more likely they will see an HTTP 500 because of race conditions exiting the context manager. The HTTP 500 is due to stale data when updating password expiration for old passwords, which happens when setting a new password for a user. This commit attempts to handle that case more gracefully than throwing a 500 by detecting StaleDataErrors from sqlalchemy and retrying. The identity sql backend will retry the request for clients that have stale data change from underneath them. Change-Id: I75590c20e90170ed862f46f0de7d61c7810b5c90 Closes-Bug: 1885753 (cherry picked from commit ceae3566e8) (cherry picked from commit f47e635b80) (cherry picked from commit 5b7d4c80d4)
2020-06-30 11:50:41 -05:00 · 2020-06-30 11:50:41 -05:00 · 07d3a3d3ff
parent cbfaab84a8
commit 07d3a3d3ff
3 changed files with 52 additions and 0 deletions
--- a/keystone/identity/backends/sql.py
+++ b/keystone/identity/backends/sql.py
@ -32,6 +32,10 @@ from keystone.identity.backends import sql_model as model
 CONF = keystone.conf.CONF


+def _stale_data_exception_checker(exc):
+    return isinstance(exc, sqlalchemy.orm.exc.StaleDataError)
+
+
 class Identity(base.IdentityDriverBase):
    # NOTE(henry-nash): Override the __init__() method so as to take a
    # config parameter to enable sql to be used as a domain-specific driver.
@ -208,6 +212,10 @@ class Identity(base.IdentityDriverBase):
            return base.filter_user(user_ref.to_dict())

    @sql.handle_conflicts(conflict_type='user')
+    # Explicitly retry on StaleDataErrors, which can happen if two clients
+    # update the same user's password and the second client has stale password
+    # information.
+    @oslo_db_api.wrap_db_retry(exception_checker=_stale_data_exception_checker)
    def update_user(self, user_id, user):
        with sql.session_for_write() as session:
            user_ref = self._get_user(session, user_id)
--- a/keystone/tests/unit/test_backend_sql.py
+++ b/keystone/tests/unit/test_backend_sql.py
@ -16,9 +16,11 @@ import datetime
 from unittest import mock
 import uuid

+import fixtures
 import freezegun
 from oslo_db import exception as db_exception
 from oslo_db import options
+from oslo_log import log
 import sqlalchemy
 from sqlalchemy import exc
 from testtools import matchers
@ -983,6 +985,42 @@ class SqlIdentity(SqlTests,
                          PROVIDERS.resource_api.check_project_depth,
                          2)

+    def test_update_user_with_stale_data_forces_retry(self):
+        # Capture log output so we know oslo.db attempted a retry
+        log_fixture = self.useFixture(fixtures.FakeLogger(level=log.DEBUG))
+
+        # Create a new user
+        user_dict = unit.new_user_ref(
+            domain_id=CONF.identity.default_domain_id)
+        new_user_dict = PROVIDERS.identity_api.create_user(user_dict)
+
+        side_effects = [
+            # Raise a StaleDataError simulating that another client has
+            # updated the user's password while this client's request was
+            # being processed
+            sqlalchemy.orm.exc.StaleDataError,
+            # The oslo.db library will retry the request, so the second
+            # time this method is called let's return a valid session
+            # object
+            sql.session_for_write()
+        ]
+        with mock.patch('keystone.common.sql.session_for_write') as m:
+            m.side_effect = side_effects
+
+            # Update a user's attribute, the first attempt will fail but
+            # oslo.db will handle the exception and retry, the second attempt
+            # will succeed
+            new_user_dict['email'] = uuid.uuid4().hex
+            PROVIDERS.identity_api.update_user(
+                new_user_dict['id'], new_user_dict)
+
+        # Make sure oslo.db retried the update by checking the log output
+        expected_log_message = (
+            'Performing DB retry for function keystone.identity.backends.'
+            'sql.Identity.update_user'
+        )
+        self.assertIn(expected_log_message, log_fixture.output)
+

 class SqlTrust(SqlTests, trust_tests.TrustTests):

--- a/releasenotes/notes/bug-1885753-51df25f3ff1d9ae8.yaml
+++ b/releasenotes/notes/bug-1885753-51df25f3ff1d9ae8.yaml
@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    [`bug 1885753 <https://bugs.launchpad.net/keystone/+bug/1885753>`_]
+    Keystone's SQL identity backend now retries update user requests to safely
+    handle stale data when two clients update a user at the same time.