Allow retries when resource acquires lock
Previously, if an update or delete on a resource is initiated concurrently with another action (e.g. a metadata update as the result of a resource signal) it may end up failing with exception.UpdateInProgress because its view of the resource's atomic_key is stale. Now, we retry, rereading the resource's atomic_key from the db if needed, up to cfg.CONF.action_retry_limit times. Change-Id: I4cfa6f691fe916c0d605a712028b88f61ebab4d9 Partial-Bug: #1675286
This commit is contained in:
parent
c2995c73df
commit
2ec2d5a973
@ -15,6 +15,7 @@ import base64
|
|||||||
import contextlib
|
import contextlib
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import pydoc
|
import pydoc
|
||||||
|
import tenacity
|
||||||
import weakref
|
import weakref
|
||||||
|
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
@ -772,14 +773,35 @@ class Resource(status.ResourceStatus):
|
|||||||
Expected exceptions are re-raised, with the Resource moved to the
|
Expected exceptions are re-raised, with the Resource moved to the
|
||||||
COMPLETE state.
|
COMPLETE state.
|
||||||
"""
|
"""
|
||||||
|
attempts = 1
|
||||||
|
first_iter = [True] # work around no nonlocal in py27
|
||||||
if self.stack.convergence:
|
if self.stack.convergence:
|
||||||
lock_acquire = self.LOCK_ACQUIRE
|
lock_acquire = self.LOCK_ACQUIRE
|
||||||
lock_release = self.LOCK_RELEASE
|
lock_release = self.LOCK_RELEASE
|
||||||
|
if action != self.CREATE:
|
||||||
|
attempts += max(cfg.CONF.client_retry_limit, 0)
|
||||||
else:
|
else:
|
||||||
lock_acquire = lock_release = self.LOCK_NONE
|
lock_acquire = lock_release = self.LOCK_NONE
|
||||||
|
|
||||||
try:
|
# retry for convergence DELETE or UPDATE if we get the usual
|
||||||
|
# lock-acquire exception of exception.UpdateInProgress
|
||||||
|
@tenacity.retry(
|
||||||
|
stop=tenacity.stop_after_attempt(attempts),
|
||||||
|
retry=tenacity.retry_if_exception_type(
|
||||||
|
exception.UpdateInProgress),
|
||||||
|
wait=tenacity.wait_random(max=2),
|
||||||
|
reraise=True)
|
||||||
|
def set_in_progress():
|
||||||
|
if not first_iter[0]:
|
||||||
|
res_obj = resource_objects.Resource.get_obj(
|
||||||
|
self.context, self.id)
|
||||||
|
self._atomic_key = res_obj.atomic_key
|
||||||
|
else:
|
||||||
|
first_iter[0] = False
|
||||||
self.state_set(action, self.IN_PROGRESS, lock=lock_acquire)
|
self.state_set(action, self.IN_PROGRESS, lock=lock_acquire)
|
||||||
|
|
||||||
|
try:
|
||||||
|
set_in_progress()
|
||||||
yield
|
yield
|
||||||
except exception.UpdateInProgress as ex:
|
except exception.UpdateInProgress as ex:
|
||||||
with excutils.save_and_reraise_exception():
|
with excutils.save_and_reraise_exception():
|
||||||
|
@ -55,8 +55,3 @@ class AodhAlarmTest(scenario_base.ScenarioTestsBase):
|
|||||||
# Note: there is little point waiting more than 60s+time to scale up.
|
# Note: there is little point waiting more than 60s+time to scale up.
|
||||||
self.assertTrue(test.call_until_true(
|
self.assertTrue(test.call_until_true(
|
||||||
120, 2, self.check_instance_count, stack_identifier, 2))
|
120, 2, self.check_instance_count, stack_identifier, 2))
|
||||||
|
|
||||||
# Temporarily avoids a race condition, addressed in the
|
|
||||||
# next change https://review.openstack.org/#/c/449351/
|
|
||||||
import time
|
|
||||||
time.sleep(3)
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user