Retool sqlite retries
... To not try instantly, but also not to wait forever to retry. Also, the maximum delay is also now the proper setting to cause the attempt to exit, and is only set to 10 seconds, with a fairly tight interval for retries to occur within. This change also doesn't abort retries for releasing a node lock and updating a node, both actions if they halt due to the close out of a task, can be catastrophic to the underlying operation and state, because internal actions around locking can't be retried with a long interval, otherwise things break in very bad ways. Change-Id: I2041e90bb0f7f522bde4338eceda97f0ae8b2c35
This commit is contained in:
parent
8e2aab8291
commit
4ba418716e
@ -26,9 +26,12 @@ opts = [
|
||||
help=_('If SQLite database operation retry logic is enabled '
|
||||
'or not. Enabled by default.')),
|
||||
cfg.IntOpt('sqlite_max_wait_for_retry',
|
||||
default=30,
|
||||
default=10,
|
||||
help=_('Maximum number of seconds to retry SQLite database '
|
||||
'locks.')),
|
||||
'locks, after which the original exception will be '
|
||||
'returned to the caller. This does not presently apply '
|
||||
'to internal node lock release actions and DB actions '
|
||||
'centered around the completion of tasks.')),
|
||||
]
|
||||
|
||||
|
||||
|
@ -70,10 +70,22 @@ def wrap_sqlite_retry(f):
|
||||
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
if (CONF.database.sqlite_retries
|
||||
and not utils.is_ironic_using_sqlite()):
|
||||
if (not CONF.database.sqlite_retries
|
||||
or not utils.is_ironic_using_sqlite()):
|
||||
return f(*args, **kwargs)
|
||||
else:
|
||||
# NOTE(TheJulia): We likely need to see if we can separate
|
||||
# update_node in API from the final actions of task manager
|
||||
# actions, but that would also be an internal API change
|
||||
# because we would likely need a special object method to
|
||||
# call for update_node to delineate an internal save versus
|
||||
# an external save.
|
||||
if f.__name__ in ['update_node', 'release_node']:
|
||||
stop = tenacity.stop_never
|
||||
else:
|
||||
stop = tenacity.stop_after_delay(
|
||||
max_delay=CONF.database.sqlite_max_wait_for_retry
|
||||
)
|
||||
for attempt in tenacity.Retrying(
|
||||
retry=(
|
||||
tenacity.retry_if_exception_type(
|
||||
@ -81,13 +93,16 @@ def wrap_sqlite_retry(f):
|
||||
& tenacity.retry_if_exception(
|
||||
lambda e: 'database is locked' in str(e))
|
||||
),
|
||||
wait=tenacity.wait_full_jitter(
|
||||
multiplier=0.25,
|
||||
max=CONF.database.sqlite_max_wait_for_retry),
|
||||
wait=tenacity.wait_random(
|
||||
min=0.1,
|
||||
max=1,
|
||||
),
|
||||
before_sleep=(
|
||||
tenacity.before_sleep_log(LOG, logging.DEBUG)
|
||||
),
|
||||
reraise=True):
|
||||
stop=stop,
|
||||
reraise=False,
|
||||
retry_error_cls=exception.TemporaryFailure):
|
||||
with attempt:
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
|
@ -40,7 +40,7 @@ jsonpatch!=1.20,>=1.16 # BSD
|
||||
Jinja2>=3.0.0 # BSD License (3 clause)
|
||||
keystonemiddleware>=9.5.0 # Apache-2.0
|
||||
oslo.messaging>=14.1.0 # Apache-2.0
|
||||
tenacity>=6.2.0 # Apache-2.0
|
||||
tenacity>=6.3.1 # Apache-2.0
|
||||
oslo.versionedobjects>=1.31.2 # Apache-2.0
|
||||
jsonschema>=3.2.0 # MIT
|
||||
psutil>=3.2.2 # BSD
|
||||
|
Loading…
Reference in New Issue
Block a user