Merge "Reset the cell cache for database access in Service" into stable/rocky
This commit is contained in:
commit
3cb5c1512e
|
@ -38,9 +38,6 @@ from nova import policy
|
||||||
from nova import utils
|
from nova import utils
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
# TODO(melwitt): This cache should be cleared whenever WSGIService receives a
|
|
||||||
# SIGHUP and periodically based on an expiration time. Currently, none of the
|
|
||||||
# cell caches are purged, so neither is this one, for now.
|
|
||||||
CELL_CACHE = {}
|
CELL_CACHE = {}
|
||||||
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
|
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
|
||||||
# waiting for a result from a cell.
|
# waiting for a result from a cell.
|
||||||
|
|
|
@ -153,6 +153,20 @@ class Service(service.Service):
|
||||||
This includes starting an RPC service, initializing
|
This includes starting an RPC service, initializing
|
||||||
periodic tasks, etc.
|
periodic tasks, etc.
|
||||||
"""
|
"""
|
||||||
|
# NOTE(melwitt): Clear the cell cache holding database transaction
|
||||||
|
# context manager objects. We do this to ensure we create new internal
|
||||||
|
# oslo.db locks to avoid a situation where a child process receives an
|
||||||
|
# already locked oslo.db lock when it is forked. When a child process
|
||||||
|
# inherits a locked oslo.db lock, database accesses through that
|
||||||
|
# transaction context manager will never be able to acquire the lock
|
||||||
|
# and requests will fail with CellTimeout errors.
|
||||||
|
# See https://bugs.python.org/issue6721 for more information.
|
||||||
|
# With python 3.7, it would be possible for oslo.db to make use of the
|
||||||
|
# os.register_at_fork() method to reinitialize its lock. Until we
|
||||||
|
# require python 3.7 as a mininum version, we must handle the situation
|
||||||
|
# outside of oslo.db.
|
||||||
|
context.CELL_CACHE = {}
|
||||||
|
|
||||||
assert_eventlet_uses_monotonic_clock()
|
assert_eventlet_uses_monotonic_clock()
|
||||||
|
|
||||||
verstr = version.version_string_with_package()
|
verstr = version.version_string_with_package()
|
||||||
|
@ -308,6 +322,8 @@ class Service(service.Service):
|
||||||
def reset(self):
|
def reset(self):
|
||||||
"""reset the service."""
|
"""reset the service."""
|
||||||
self.manager.reset()
|
self.manager.reset()
|
||||||
|
# Reset the cell cache that holds database transaction context managers
|
||||||
|
context.CELL_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
class WSGIService(service.Service):
|
class WSGIService(service.Service):
|
||||||
|
@ -362,13 +378,18 @@ class WSGIService(service.Service):
|
||||||
setup_profiler(name, self.host)
|
setup_profiler(name, self.host)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
"""Reset server greenpool size to default and service version cache.
|
"""Reset the following:
|
||||||
|
|
||||||
|
* server greenpool size to default
|
||||||
|
* service version cache
|
||||||
|
* cell cache holding database transaction context managers
|
||||||
|
|
||||||
:returns: None
|
:returns: None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.server.reset()
|
self.server.reset()
|
||||||
service_obj.Service.clear_min_version_cache()
|
service_obj.Service.clear_min_version_cache()
|
||||||
|
context.CELL_CACHE = {}
|
||||||
|
|
||||||
def _get_manager(self):
|
def _get_manager(self):
|
||||||
"""Initialize a Manager object appropriate for this service.
|
"""Initialize a Manager object appropriate for this service.
|
||||||
|
@ -396,6 +417,20 @@ class WSGIService(service.Service):
|
||||||
:returns: None
|
:returns: None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# NOTE(melwitt): Clear the cell cache holding database transaction
|
||||||
|
# context manager objects. We do this to ensure we create new internal
|
||||||
|
# oslo.db locks to avoid a situation where a child process receives an
|
||||||
|
# already locked oslo.db lock when it is forked. When a child process
|
||||||
|
# inherits a locked oslo.db lock, database accesses through that
|
||||||
|
# transaction context manager will never be able to acquire the lock
|
||||||
|
# and requests will fail with CellTimeout errors.
|
||||||
|
# See https://bugs.python.org/issue6721 for more information.
|
||||||
|
# With python 3.7, it would be possible for oslo.db to make use of the
|
||||||
|
# os.register_at_fork() method to reinitialize its lock. Until we
|
||||||
|
# require python 3.7 as a mininum version, we must handle the situation
|
||||||
|
# outside of oslo.db.
|
||||||
|
context.CELL_CACHE = {}
|
||||||
|
|
||||||
ctxt = context.get_admin_context()
|
ctxt = context.get_admin_context()
|
||||||
service_ref = objects.Service.get_by_host_and_binary(ctxt, self.host,
|
service_ref = objects.Service.get_by_host_and_binary(ctxt, self.host,
|
||||||
self.binary)
|
self.binary)
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
from nova import context as nova_context
|
||||||
|
from nova import test
|
||||||
|
from nova.tests import fixtures as nova_fixtures
|
||||||
|
from nova.tests.functional import integrated_helpers
|
||||||
|
from nova.tests.unit.image import fake as fake_image
|
||||||
|
from nova.tests.unit import policy_fixture
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceTestCase(test.TestCase,
|
||||||
|
integrated_helpers.InstanceHelperMixin):
|
||||||
|
"""Contains scenarios for testing services.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(ServiceTestCase, self).setUp()
|
||||||
|
# Use the standard fixtures.
|
||||||
|
self.useFixture(policy_fixture.RealPolicyFixture())
|
||||||
|
self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||||
|
self.useFixture(nova_fixtures.PlacementFixture())
|
||||||
|
fake_image.stub_out_image_service(self)
|
||||||
|
self.addCleanup(fake_image.FakeImageService_reset)
|
||||||
|
# Start nova controller services.
|
||||||
|
self.api = self.useFixture(nova_fixtures.OSAPIFixture(
|
||||||
|
api_version='v2.1')).api
|
||||||
|
self.start_service('conductor')
|
||||||
|
self.scheduler = self.start_service('scheduler')
|
||||||
|
# Our OSAPIFixture does not use a WSGIService, so just use the metadata
|
||||||
|
# server fixture (which uses WSGIService) for testing.
|
||||||
|
self.metadata = self.useFixture(
|
||||||
|
nova_fixtures.OSMetadataServer()).metadata
|
||||||
|
# Start one compute service.
|
||||||
|
self.start_service('compute')
|
||||||
|
|
||||||
|
def test_service_reset_resets_cell_cache(self):
|
||||||
|
"""Tests that the cell cache for database transaction context managers
|
||||||
|
is cleared after a service reset (example scenario: SIGHUP).
|
||||||
|
"""
|
||||||
|
server_req = self._build_minimal_create_server_request(
|
||||||
|
self.api, 'test-service-reset')
|
||||||
|
server = self.api.post_server({'server': server_req})
|
||||||
|
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||||
|
# Cell cache should be populated after creating a server.
|
||||||
|
self.assertTrue(nova_context.CELL_CACHE)
|
||||||
|
self.scheduler.reset()
|
||||||
|
# Cell cache should be empty after the service reset.
|
||||||
|
self.assertEqual({}, nova_context.CELL_CACHE)
|
||||||
|
|
||||||
|
# Now test the WSGI service.
|
||||||
|
server = self.api.post_server({'server': server_req})
|
||||||
|
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||||
|
# Cell cache should be populated after creating a server.
|
||||||
|
self.assertTrue(nova_context.CELL_CACHE)
|
||||||
|
self.metadata.reset()
|
||||||
|
# Cell cache should be empty after the service reset.
|
||||||
|
self.assertEqual({}, nova_context.CELL_CACHE)
|
||||||
|
|
||||||
|
def test_service_start_resets_cell_cache(self):
|
||||||
|
"""Tests that the cell cache for database transaction context managers
|
||||||
|
is cleared upon a service start (example scenario: service start after
|
||||||
|
a SIGTERM and the parent process forks child process workers).
|
||||||
|
"""
|
||||||
|
server_req = self._build_minimal_create_server_request(
|
||||||
|
self.api, 'test-service-start')
|
||||||
|
server = self.api.post_server({'server': server_req})
|
||||||
|
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||||
|
# Cell cache should be populated after creating a server.
|
||||||
|
self.assertTrue(nova_context.CELL_CACHE)
|
||||||
|
self.scheduler.stop()
|
||||||
|
# NOTE(melwitt): Simulate a service starting after being stopped. The
|
||||||
|
# scenario we want to handle is one where during start, the parent
|
||||||
|
# process forks child process workers while one or more of its cached
|
||||||
|
# database transaction context managers is inside a locked code
|
||||||
|
# section. If the child processes are forked while the lock is locked,
|
||||||
|
# the child processes begin with an already locked lock that can never
|
||||||
|
# be acquired again. The result is that requests gets stuck and fail
|
||||||
|
# with a CellTimeout error.
|
||||||
|
self.scheduler.start()
|
||||||
|
# Cell cache should be empty after the service start.
|
||||||
|
self.assertEqual({}, nova_context.CELL_CACHE)
|
||||||
|
|
||||||
|
# Now test the WSGI service.
|
||||||
|
server = self.api.post_server({'server': server_req})
|
||||||
|
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||||
|
# Cell cache should be populated after creating a server.
|
||||||
|
self.assertTrue(nova_context.CELL_CACHE)
|
||||||
|
self.metadata.stop()
|
||||||
|
self.metadata.start()
|
||||||
|
# Cell cache should be empty after the service reset.
|
||||||
|
self.assertEqual({}, nova_context.CELL_CACHE)
|
Loading…
Reference in New Issue