Cache database and message queue connection objects

Recently in the gate we have seen a trace on some work-in-progress
patches:

  OperationalError: (pymysql.err.OperationalError)
                    (1040, u'Too many connections')

and at least one operator has reported that the number of database
connections increased significantly going from Mitaka to Newton.

It was suspected that the increase was caused by creating new oslo.db
transaction context managers on-the-fly when switching database
connections for cells. Comparing the dstat --tcp output of runs of the
gate-tempest-dsvm-neutron-full-ubuntu-xenial job with and without
caching of the database connections showed a difference of 445 active
TCP connections and 1495 active TCP connections, respectively [1].

This adds caching of the oslo.db transaction context managers and the
oslo.messaging transports to avoid creating a large number of objects
that are not being garbage-collected as expected.

Closes-Bug: #1691545

[1] https://docs.google.com/spreadsheets/d/1DIfFfX3kaA_SRoCM-aO7BN4IBEShChXLztOBFeKryt4/edit?usp=sharing

 Conflicts:
	nova/test.py
	nova/tests/unit/test_context.py

NOTE(melwitt): Conflicts caused by the fact that no other global cache
resets exist in nova.test.py in Newton and the get_context function
doesn't exist in Newton.

Change-Id: I17e0eb836dd87aac5859f506e7d771d42753d31a
(cherry picked from commit f4159d1755)
This commit is contained in:
melanie witt 2017-05-16 10:25:42 +00:00
parent e195cfe098
commit d6a628da62
4 changed files with 56 additions and 3 deletions

View File

@ -34,6 +34,10 @@ from nova import policy
from nova import utils from nova import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
# TODO(melwitt): This cache should be cleared whenever WSGIService receives a
# SIGHUP and periodically based on an expiration time. Currently, none of the
# cell caches are purged, so neither is this one, for now.
CELL_CACHE = {}
class _ContextAuthPlugin(plugin.BaseAuthPlugin): class _ContextAuthPlugin(plugin.BaseAuthPlugin):
@ -349,11 +353,27 @@ def target_cell(context, cell_mapping):
:param context: The RequestContext to add connection information :param context: The RequestContext to add connection information
:param cell_mapping: A objects.CellMapping object :param cell_mapping: A objects.CellMapping object
""" """
global CELL_CACHE
original_db_connection = context.db_connection original_db_connection = context.db_connection
# avoid circular import # avoid circular import
from nova import db from nova import db
db_connection_string = cell_mapping.database_connection
context.db_connection = db.create_context_manager(db_connection_string) # Synchronize access to the cache by multiple API workers.
@utils.synchronized(cell_mapping.uuid)
def get_or_set_cached_cell_and_set_connections():
try:
cell_db_conn = CELL_CACHE[cell_mapping.uuid]
except KeyError:
db_connection_string = cell_mapping.database_connection
context.db_connection = db.create_context_manager(
db_connection_string)
CELL_CACHE[cell_mapping.uuid] = context.db_connection
else:
context.db_connection = cell_db_conn
get_or_set_cached_cell_and_set_connections()
try: try:
yield context yield context
finally: finally:

View File

@ -211,6 +211,9 @@ class TestCase(testtools.TestCase):
self.useFixture(conf_fixture.ConfFixture(CONF)) self.useFixture(conf_fixture.ConfFixture(CONF))
self.useFixture(nova_fixtures.RPCFixture('nova.test')) self.useFixture(nova_fixtures.RPCFixture('nova.test'))
# NOTE(melwitt): Reset the cached db connection objects
context.CELL_CACHE = {}
if self.USES_DB: if self.USES_DB:
self.useFixture(nova_fixtures.Database()) self.useFixture(nova_fixtures.Database())
self.useFixture(nova_fixtures.Database(database='api')) self.useFixture(nova_fixtures.Database(database='api'))

View File

@ -20,6 +20,7 @@ from nova import context
from nova import exception from nova import exception
from nova import objects from nova import objects
from nova import test from nova import test
from nova.tests import uuidsentinel as uuids
class ContextTestCase(test.NoDBTestCase): class ContextTestCase(test.NoDBTestCase):
@ -286,7 +287,26 @@ class ContextTestCase(test.NoDBTestCase):
roles=['admin', 'weasel']) roles=['admin', 'weasel'])
# Verify the existing db_connection, if any, is restored # Verify the existing db_connection, if any, is restored
ctxt.db_connection = mock.sentinel.db_conn ctxt.db_connection = mock.sentinel.db_conn
mapping = objects.CellMapping(database_connection='fake://') mapping = objects.CellMapping(database_connection='fake://',
transport_url='fake://',
uuid=uuids.cell)
with context.target_cell(ctxt, mapping): with context.target_cell(ctxt, mapping):
self.assertEqual(ctxt.db_connection, mock.sentinel.cm) self.assertEqual(ctxt.db_connection, mock.sentinel.cm)
self.assertEqual(mock.sentinel.db_conn, ctxt.db_connection) self.assertEqual(mock.sentinel.db_conn, ctxt.db_connection)
@mock.patch('nova.db.create_context_manager')
def test_target_cell_caching(self, mock_create_cm):
mock_create_cm.return_value = mock.sentinel.db_conn_obj
ctxt = context.RequestContext('111', '222')
mapping = objects.CellMapping(database_connection='fake://db',
transport_url='fake://mq',
uuid=uuids.cell)
# First call should create new connection objects.
with context.target_cell(ctxt, mapping):
self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
mock_create_cm.assert_called_once_with('fake://db')
# Second call should use cached objects.
mock_create_cm.reset_mock()
with context.target_cell(ctxt, mapping):
self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
mock_create_cm.assert_not_called()

View File

@ -0,0 +1,10 @@
---
fixes:
- |
Fixes `bug 1691545`_ in which there was a significant increase in database
connections because of the way connections to cell databases were being
established. With this fix, objects related to database connections are
cached in the API service and reused to prevent new connections being
established for every communication with cell databases.
.. _bug 1691545: https://bugs.launchpad.net/nova/+bug/1691545