Fix target_cell usage for scatter_gather_cells

In the set_target_cell method, we synchronize access to the cached cell
database transaction context manager objects to prevent more than one
thread from using a cell's transaction context manager at the same
time.

In scatter_gather_cells, we're calling target_cell in such a way that
the lock is acquired and released BEFORE the green thread actually
accesses the database in the spawned function. So multiple threads can
access a cell's database transaction context manager and it's possible
for a database transaction to fail with the error:

  TypeError: Can't upgrade a READER transaction to a WRITER
             mid-transaction

because the in-scope transaction context might be in the middle of a
read when a concurrent green thread tries to do a write.

Closes-Bug: #1722404

Change-Id: I07dd4d5aebdc82e343ec2035dc94c744e4754c96
This commit is contained in:
melanie witt 2017-10-09 20:36:59 +00:00
parent c3ca8a6256
commit 3cc3cc453d
2 changed files with 23 additions and 8 deletions

View File

@ -419,9 +419,11 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
queue = eventlet.queue.LightQueue()
results = {}
def gather_result(cell_uuid, fn, *args, **kwargs):
def gather_result(cell_mapping, fn, context, *args, **kwargs):
cell_uuid = cell_mapping.uuid
try:
result = fn(*args, **kwargs)
with target_cell(context, cell_mapping) as cctxt:
result = fn(cctxt, *args, **kwargs)
except Exception:
LOG.exception('Error gathering result from cell %s', cell_uuid)
result = raised_exception_sentinel
@ -429,10 +431,9 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
queue.put((cell_uuid, result))
for cell_mapping in cell_mappings:
with target_cell(context, cell_mapping) as cctxt:
greenthreads.append((cell_mapping.uuid,
utils.spawn(gather_result, cell_mapping.uuid,
fn, cctxt, *args, **kwargs)))
greenthreads.append((cell_mapping.uuid,
utils.spawn(gather_result, cell_mapping,
fn, context, *args, **kwargs)))
with eventlet.timeout.Timeout(timeout, exception.CellTimeout):
try:

View File

@ -307,21 +307,35 @@ class ContextTestCase(test.NoDBTestCase):
@mock.patch('nova.context.target_cell')
@mock.patch('nova.objects.InstanceList.get_by_filters')
def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell):
self.useFixture(nova_fixtures.SpawnIsSynchronousFixture())
ctxt = context.get_context()
mapping = objects.CellMapping(database_connection='fake://db',
transport_url='fake://mq',
uuid=uuids.cell)
mappings = objects.CellMappingList(objects=[mapping])
# Use a mock manager to assert call order across mocks.
manager = mock.Mock()
manager.attach_mock(mock_get_inst, 'get_inst')
manager.attach_mock(mock_target_cell, 'target_cell')
filters = {'deleted': False}
context.scatter_gather_cells(
ctxt, mappings, 60, objects.InstanceList.get_by_filters, filters,
sort_dir='foo')
mock_get_inst.assert_called_once_with(
# NOTE(melwitt): This only works without the SpawnIsSynchronous fixture
# because when the spawn is treated as synchronous and the thread
# function is called immediately, it will occur inside the target_cell
# context manager scope when it wouldn't with a real spawn.
# Assert that InstanceList.get_by_filters was called before the
# target_cell context manager exited.
get_inst_call = mock.call.get_inst(
mock_target_cell.return_value.__enter__.return_value, filters,
sort_dir='foo')
expected_calls = [get_inst_call,
mock.call.target_cell().__exit__(None, None, None)]
manager.assert_has_calls(expected_calls)
@mock.patch('nova.context.LOG.warning')
@mock.patch('eventlet.timeout.Timeout')