From d6a628da62f810310ab1bdc2e04222d8010e7b62 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Tue, 16 May 2017 10:25:42 +0000 Subject: [PATCH] Cache database and message queue connection objects Recently in the gate we have seen a trace on some work-in-progress patches: OperationalError: (pymysql.err.OperationalError) (1040, u'Too many connections') and at least one operator has reported that the number of database connections increased significantly going from Mitaka to Newton. It was suspected that the increase was caused by creating new oslo.db transaction context managers on-the-fly when switching database connections for cells. Comparing the dstat --tcp output of runs of the gate-tempest-dsvm-neutron-full-ubuntu-xenial job with and without caching of the database connections showed a difference of 445 active TCP connections and 1495 active TCP connections, respectively [1]. This adds caching of the oslo.db transaction context managers and the oslo.messaging transports to avoid creating a large number of objects that are not being garbage-collected as expected. Closes-Bug: #1691545 [1] https://docs.google.com/spreadsheets/d/1DIfFfX3kaA_SRoCM-aO7BN4IBEShChXLztOBFeKryt4/edit?usp=sharing Conflicts: nova/test.py nova/tests/unit/test_context.py NOTE(melwitt): Conflicts caused by the fact that no other global cache resets exist in nova.test.py in Newton and the get_context function doesn't exist in Newton. Change-Id: I17e0eb836dd87aac5859f506e7d771d42753d31a (cherry picked from commit f4159d17552603b90912dba6fe5c604e8d0b8aa7) --- nova/context.py | 24 +++++++++++++++++-- nova/test.py | 3 +++ nova/tests/unit/test_context.py | 22 ++++++++++++++++- .../notes/bug-1691545-1acd6512effbdffb.yaml | 10 ++++++++ 4 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml diff --git a/nova/context.py b/nova/context.py index 5d464f034f18..7cacad3230ca 100644 --- a/nova/context.py +++ b/nova/context.py @@ -34,6 +34,10 @@ from nova import policy from nova import utils LOG = logging.getLogger(__name__) +# TODO(melwitt): This cache should be cleared whenever WSGIService receives a +# SIGHUP and periodically based on an expiration time. Currently, none of the +# cell caches are purged, so neither is this one, for now. +CELL_CACHE = {} class _ContextAuthPlugin(plugin.BaseAuthPlugin): @@ -349,11 +353,27 @@ def target_cell(context, cell_mapping): :param context: The RequestContext to add connection information :param cell_mapping: A objects.CellMapping object """ + global CELL_CACHE + original_db_connection = context.db_connection # avoid circular import from nova import db - db_connection_string = cell_mapping.database_connection - context.db_connection = db.create_context_manager(db_connection_string) + + # Synchronize access to the cache by multiple API workers. + @utils.synchronized(cell_mapping.uuid) + def get_or_set_cached_cell_and_set_connections(): + try: + cell_db_conn = CELL_CACHE[cell_mapping.uuid] + except KeyError: + db_connection_string = cell_mapping.database_connection + context.db_connection = db.create_context_manager( + db_connection_string) + CELL_CACHE[cell_mapping.uuid] = context.db_connection + else: + context.db_connection = cell_db_conn + + get_or_set_cached_cell_and_set_connections() + try: yield context finally: diff --git a/nova/test.py b/nova/test.py index caf1f89b8f84..7ad78b2c4dc8 100644 --- a/nova/test.py +++ b/nova/test.py @@ -211,6 +211,9 @@ class TestCase(testtools.TestCase): self.useFixture(conf_fixture.ConfFixture(CONF)) self.useFixture(nova_fixtures.RPCFixture('nova.test')) + # NOTE(melwitt): Reset the cached db connection objects + context.CELL_CACHE = {} + if self.USES_DB: self.useFixture(nova_fixtures.Database()) self.useFixture(nova_fixtures.Database(database='api')) diff --git a/nova/tests/unit/test_context.py b/nova/tests/unit/test_context.py index f406ccf5c0e3..f48021632fe3 100644 --- a/nova/tests/unit/test_context.py +++ b/nova/tests/unit/test_context.py @@ -20,6 +20,7 @@ from nova import context from nova import exception from nova import objects from nova import test +from nova.tests import uuidsentinel as uuids class ContextTestCase(test.NoDBTestCase): @@ -286,7 +287,26 @@ class ContextTestCase(test.NoDBTestCase): roles=['admin', 'weasel']) # Verify the existing db_connection, if any, is restored ctxt.db_connection = mock.sentinel.db_conn - mapping = objects.CellMapping(database_connection='fake://') + mapping = objects.CellMapping(database_connection='fake://', + transport_url='fake://', + uuid=uuids.cell) with context.target_cell(ctxt, mapping): self.assertEqual(ctxt.db_connection, mock.sentinel.cm) self.assertEqual(mock.sentinel.db_conn, ctxt.db_connection) + + @mock.patch('nova.db.create_context_manager') + def test_target_cell_caching(self, mock_create_cm): + mock_create_cm.return_value = mock.sentinel.db_conn_obj + ctxt = context.RequestContext('111', '222') + mapping = objects.CellMapping(database_connection='fake://db', + transport_url='fake://mq', + uuid=uuids.cell) + # First call should create new connection objects. + with context.target_cell(ctxt, mapping): + self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection) + mock_create_cm.assert_called_once_with('fake://db') + # Second call should use cached objects. + mock_create_cm.reset_mock() + with context.target_cell(ctxt, mapping): + self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection) + mock_create_cm.assert_not_called() diff --git a/releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml b/releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml new file mode 100644 index 000000000000..e4a0bf4f9048 --- /dev/null +++ b/releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Fixes `bug 1691545`_ in which there was a significant increase in database + connections because of the way connections to cell databases were being + established. With this fix, objects related to database connections are + cached in the API service and reused to prevent new connections being + established for every communication with cell databases. + + .. _bug 1691545: https://bugs.launchpad.net/nova/+bug/1691545