Merge "Add allocation_conflict_retry_count conf setting"

This commit is contained in:
Zuul 2019-11-14 21:07:05 +00:00 committed by Gerrit Code Review
commit b87073c991
5 changed files with 61 additions and 32 deletions

View File

@ -72,6 +72,14 @@ a project or user identifier for the consumer. In cleaning up the data
modeling, we no longer allow missing project and user information. If an older
client makes an allocation, we'll use this in place of the information it
doesn't provide.
"""),
cfg.IntOpt(
'allocation_conflict_retry_count',
default=10,
help="""
The number of times to retry, server-side, writing allocations when there is
a resource provider generation conflict. Raising this value may be useful
when many concurrent allocations to the same resource provider are expected.
"""),
]

View File

@ -35,10 +35,6 @@ _USER_TBL = models.User.__table__
LOG = logging.getLogger(__name__)
# The number of times to retry set_allocations if there has
# been a resource provider (not consumer) generation coflict.
RP_CONFLICT_RETRY_COUNT = 10
class Allocation(object):
@ -499,7 +495,7 @@ def replace_all(context, alloc_list):
# and try again. For sake of simplicity (and because we don't have
# easy access to the information) we reload all the resource
# providers that may be present.
retries = RP_CONFLICT_RETRY_COUNT
retries = context.config.placement.allocation_conflict_retry_count
while retries:
retries -= 1
try:
@ -526,7 +522,7 @@ def replace_all(context, alloc_list):
# information from the allocations is not coherent as this
# could be multiple consumers and providers.
LOG.warning('Exceeded retry limit of %d on allocations write',
RP_CONFLICT_RETRY_COUNT)
context.config.placement.allocation_conflict_retry_count)
raise exception.ResourceProviderConcurrentUpdateDetected()

View File

@ -624,40 +624,42 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
]
# Make sure the right exception happens when the retry loop expires.
with mock.patch.object(alloc_obj, 'RP_CONFLICT_RETRY_COUNT', 0):
self.assertRaises(
exception.ResourceProviderConcurrentUpdateDetected,
alloc_obj.replace_all, self.ctx, alloc_list)
mock_log.warning.assert_called_with(
'Exceeded retry limit of %d on allocations write', 0)
self.conf_fixture.config(allocation_conflict_retry_count=0,
group='placement')
self.assertRaises(
exception.ResourceProviderConcurrentUpdateDetected,
alloc_obj.replace_all, self.ctx, alloc_list)
mock_log.warning.assert_called_with(
'Exceeded retry limit of %d on allocations write', 0)
# Make sure the right thing happens after a small number of failures.
# There's a bit of mock magic going on here to enusre that we can
# both do some side effects on _set_allocations as well as have the
# real behavior. Two generation conflicts and then a success.
mock_log.reset_mock()
with mock.patch.object(alloc_obj, 'RP_CONFLICT_RETRY_COUNT', 3):
unmocked_set = alloc_obj._set_allocations
with mock.patch('placement.objects.allocation.'
'_set_allocations') as mock_set:
exceptions = iter([
exception.ResourceProviderConcurrentUpdateDetected(),
exception.ResourceProviderConcurrentUpdateDetected(),
])
self.conf_fixture.config(allocation_conflict_retry_count=3,
group='placement')
unmocked_set = alloc_obj._set_allocations
with mock.patch('placement.objects.allocation.'
'_set_allocations') as mock_set:
exceptions = iter([
exception.ResourceProviderConcurrentUpdateDetected(),
exception.ResourceProviderConcurrentUpdateDetected(),
])
def side_effect(*args, **kwargs):
try:
raise next(exceptions)
except StopIteration:
return unmocked_set(*args, **kwargs)
def side_effect(*args, **kwargs):
try:
raise next(exceptions)
except StopIteration:
return unmocked_set(*args, **kwargs)
mock_set.side_effect = side_effect
alloc_obj.replace_all(self.ctx, alloc_list)
self.assertEqual(2, mock_log.debug.call_count)
mock_log.debug.called_with(
'Retrying allocations write on resource provider '
'generation conflict')
self.assertEqual(3, mock_set.call_count)
mock_set.side_effect = side_effect
alloc_obj.replace_all(self.ctx, alloc_list)
self.assertEqual(2, mock_log.debug.call_count)
mock_log.debug.called_with(
'Retrying allocations write on resource provider '
'generation conflict')
self.assertEqual(3, mock_set.call_count)
# Confirm we're using a different rp object after the change
# and that it has a higher generation.

View File

@ -79,6 +79,11 @@ class APIFixture(fixture.GabbiFixture):
self.placement_db_fixture.setUp()
self.context = context.RequestContext()
# Some database interaction methods require access to the oslo config
# via the context. Within the WSGI application this is taken care of
# but here in the fixtures we use some of those methods to create
# entities.
self.context.config = self.conf_fixture.conf
# Set default policy opts, otherwise the deploy module can
# NoSuchOptError.

View File

@ -0,0 +1,18 @@
---
fixes:
- |
When a single resource provider receives many concurrent allocation writes,
retries may be performed server side when there is a resource provider
generation conflict. When those retries are all consumed, the client
receives an HTTP 409 response and may choose to try the request again.
In an environment where high levels of concurrent allocation writes are
common, such as a busy clustered hypervisor, the default retry count may be
too low. See story 2006467_
A new configuation setting,
``[placement]/allocation_conflict_retry_count``, has been added to address
this situation. It defines the number of times to retry, server-side,
writing allocations when there is a resource provider generation conflict.
.. _2006467: https://storyboard.openstack.org/#!/story/2006467