Fix DB deadlock in quotas

There's a potential deadlock when creating the first LB in a new
project. The query that creates an empty quota object for the project
may have triggered a deadlock in the DB, this deadlock should have been
handlded by an oslo_db.api.wrab_db_retry decorator but the decorated
function was incorrect called: the DB session must begin in the
decorated function and not in the caller.
If begin is called by the caller, in case of retry sqlalchemy will
complain that the session was not correctly restarted after a rollback.

Closes-Bug: #2038798
Change-Id: Iec1f8f8f61d77640d4a33d7ffb62925f7e860286
This commit is contained in:
Gregory Thiemonge
2023-10-31 05:59:44 -04:00
parent 2a1c4709a0
commit 333f035ed2
3 changed files with 68 additions and 14 deletions
+16 -12
View File
@@ -40,6 +40,7 @@ from octavia.common import constants as consts
from octavia.common import data_models
from octavia.common import exceptions
from octavia.common import validate
from octavia.db import api as db_api
from octavia.db import models
CONF = cfg.CONF
@@ -388,13 +389,8 @@ class Repositories(object):
if not project_id:
raise exceptions.MissingProjectID()
quotas = self.quotas.get(session, project_id=project_id)
if not quotas:
# Make sure we have a record to lock
self.quotas.update(
session,
project_id,
quota={})
self.quotas.ensure_project_exists(project_id)
# Lock the project record in the database to block other quota checks
#
# Note: You cannot just use the current count as the in-use
@@ -1881,11 +1877,6 @@ class L7PolicyRepository(BaseRepository):
class QuotasRepository(BaseRepository):
model_class = models.Quotas
# Since this is for the initial quota record creation it locks the table
# which can lead to recoverable deadlocks. Thus we use the deadlock
# retry wrapper here. This may not be appropriate for other sessions
# and or queries. Use with caution.
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
def update(self, session, project_id, **model_kwargs):
kwargs_quota = model_kwargs['quota']
quotas = (
@@ -1902,6 +1893,19 @@ class QuotasRepository(BaseRepository):
session.flush()
return self.get(session, project_id=project_id)
# Since this is for the initial quota record creation it locks the table
# which can lead to recoverable deadlocks. Thus we use the deadlock
# retry wrapper here. This may not be appropriate for other sessions
# and or queries. Use with caution.
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
def ensure_project_exists(self, project_id):
with db_api.session().begin() as session:
quotas = self.get(session, project_id=project_id)
if not quotas:
# Make sure we have a record to lock
self.update(session, project_id, quota={})
session.commit()
def delete(self, session, project_id):
quotas = (
session.query(self.model_class)
+9
View File
@@ -30,6 +30,8 @@ from octavia.tests import fixtures as oc_fixtures
class OctaviaDBTestBase(test_base.BaseTestCase):
facade = None
def setUp(self, connection_string='sqlite://'):
super().setUp()
@@ -73,11 +75,18 @@ class OctaviaDBTestBase(test_base.BaseTestCase):
sqlite_fk=True)
engine = facade.get_engine()
session = facade.get_session(expire_on_commit=True)
self.facade = facade
else:
engine = db_api.get_engine()
session = db_api.get_session()
return engine, session
def get_session(self):
if 'sqlite:///' in self.connection_string:
return self.facade.get_session(expire_on_commit=True)
else:
return db_api.get_session()
def _seed_lookup_tables(self, session):
self._seed_lookup_table(
session, constants.SUPPORTED_PROVISIONING_STATUSES,
@@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
from concurrent.futures import ThreadPoolExecutor
import datetime
import random
from unittest import mock
@@ -509,6 +510,47 @@ class AllRepositoriesTest(base.OctaviaDBTestBase):
project_id=project_id).all()
self.assertEqual(1, len(lbs)) # After rollback: 1 (broken!)
def test_check_quota_met_check_deadlock(self):
# This test doesn't work with sqlite, using another backend is not
# straighforward, we need to update the connection_string passed to the
# __init__ func and also change some calls in the constructor (don't
# create the DB objects if we use a DB that was deployed for Octavia)
if 'sqlite://' in self.connection_string:
self.skipTest("The test for checking potential deadlocks "
"doesn't work with the sqlite backend")
conf = self.useFixture(oslo_fixture.Config(cfg.CONF))
conf.config(group='api_settings', auth_strategy=constants.TESTING)
conf.config(group='quotas', default_load_balancer_quota=-1)
# Calling check_quota_met concurrently from many threads may
# have triggered a deadlock in the DB
# (Note: we run the test 8 times because it's not 100% reproducible)
# https://bugs.launchpad.net/octavia/+bug/2038798
for _ in range(8):
number_of_projects = 8
project_ids = (
uuidutils.generate_uuid()
for _ in range(number_of_projects))
with ThreadPoolExecutor(
max_workers=number_of_projects) as executor:
def _test_check_quota_met(project_id):
session = self.get_session()
session.begin()
self.assertFalse(self.repos.check_quota_met(
session, data_models.LoadBalancer,
project_id))
session.commit()
futs = []
for project_id in project_ids:
future = executor.submit(_test_check_quota_met, project_id)
futs.append(future)
for fut in futs:
fut.result()
def test_check_quota_met(self):
project_id = uuidutils.generate_uuid()
@@ -538,10 +580,9 @@ class AllRepositoriesTest(base.OctaviaDBTestBase):
# Test DB deadlock case
project_id = uuidutils.generate_uuid()
mock_quotas = mock.MagicMock()
mock_session = mock.MagicMock()
mock_session.query = mock.MagicMock(
side_effect=[mock_quotas, db_exception.DBDeadlock])
side_effect=db_exception.DBDeadlock)
self.assertRaises(exceptions.ProjectBusyException,
self.repos.check_quota_met,
mock_session,