Make config parameter names for RetryHelper more generic
Initially RetryHelper was exclusively used to retry change updates in NoteDb. However since change I8c16487780 we use RetryHelper also to retry non-NoteDb operations, e.g. index queries. This means RetryHelper is no longer specific to NoteDb updates and hence its configuration parameters shouldn't be part of the noteDb configuration. Hence rename notedb.retryMaxWait to retry.maxWait and notedb.retryTimeout to retry.timeout. Since it may make sense to have a different retry timeout for certain operation types it is possible to overwrite retry.timeout by operation type by setting notedb.<operationType>.retryTimeout. Change-Id: I1a869b0f7d593a3145848db74317fd97483265d1 Signed-off-by: Edwin Kempin <ekempin@google.com>
This commit is contained in:
@@ -3437,26 +3437,6 @@ each process retrieves at once.
|
||||
+
|
||||
By default, 1.
|
||||
|
||||
[[noteDb.retryMaxWait]]noteDb.retryMaxWait::
|
||||
+
|
||||
Maximum time to wait between attempts to retry update operations when one
|
||||
attempt fails due to contention (aka lock failure) on the underlying ref
|
||||
storage. Operations are retried with exponential backoff, plus some random
|
||||
jitter, until the interval reaches this limit. After that, retries continue to
|
||||
occur after a fixed timeout (plus jitter), up to
|
||||
link:#noteDb.retryTimeout[`noteDb.retryTimeout`].
|
||||
+
|
||||
Defaults to 5 seconds; unit suffixes are supported, and assumes milliseconds if
|
||||
not specified.
|
||||
|
||||
[[noteDb.retryTimeout]]noteDb.retryTimeout::
|
||||
+
|
||||
Total timeout for retrying update operations when one attempt fails due to
|
||||
contention (aka lock failure) on the underlying ref storage.
|
||||
+
|
||||
Defaults to 20 seconds; unit suffixes are supported, and assumes milliseconds if
|
||||
not specified.
|
||||
|
||||
|
||||
[[oauth]]
|
||||
=== Section oauth
|
||||
@@ -3780,6 +3760,40 @@ A name of a group which exists in the database. Zero, one or many
|
||||
groups are allowed. Each on its own line. Groups which don't exist
|
||||
in the database are ignored.
|
||||
|
||||
[[retry]]
|
||||
=== Section retry
|
||||
|
||||
[[retry.maxWait]]retry.maxWait::
|
||||
+
|
||||
Maximum time to wait between attempts to retry an operations when one attempt
|
||||
fails (e.g. on NoteDb updates due to contention, aka lock failure, on the
|
||||
underlying ref storage). Operations are retried with exponential backoff, plus
|
||||
some random jitter, until the interval reaches this limit. After that, retries
|
||||
continue to occur after a fixed timeout (plus jitter), up to
|
||||
link:#retry.timeout[`retry.timeout`].
|
||||
+
|
||||
Defaults to 5 seconds; unit suffixes are supported, and assumes milliseconds if
|
||||
not specified.
|
||||
|
||||
[[retry.timeout]]retry.timeout::
|
||||
+
|
||||
Total timeout for retrying operations when one attempt fails.
|
||||
+
|
||||
It is possible to overwrite this default timeout based on operation types by
|
||||
setting link:#retry.operationType.timeout[`retry.<operationType>.timeout`].
|
||||
+
|
||||
Defaults to 20 seconds; unit suffixes are supported, and assumes milliseconds if
|
||||
not specified.
|
||||
|
||||
[[retry.operationType.timeout]]retry.<operationType>.timeout::
|
||||
+
|
||||
Total timeout for retrying operations of type `<operationType>` when one
|
||||
attempt fails. `<operationType>` can be `ACCOUNT_UPDATE`, `CHANGE_UPDATE`,
|
||||
`GROUP_UPDATE` and `INDEX_QUERY`.
|
||||
+
|
||||
Defaults to link:#retry.timeout[`retry.timeout`]; unit suffixes are supported,
|
||||
and assumes milliseconds if not specified.
|
||||
|
||||
[[rules]]
|
||||
=== Section rules
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ import com.google.gerrit.server.update.BatchUpdate;
|
||||
import com.google.gerrit.server.update.BatchUpdateOp;
|
||||
import com.google.gerrit.server.update.ChangeContext;
|
||||
import com.google.gerrit.server.update.RetryHelper;
|
||||
import com.google.gerrit.server.update.RetryHelper.ActionType;
|
||||
import com.google.gerrit.server.update.UpdateException;
|
||||
import com.google.gerrit.server.util.RequestId;
|
||||
import com.google.gwtorm.server.OrmException;
|
||||
@@ -489,7 +490,10 @@ public class MergeOp implements AutoCloseable {
|
||||
.listener(retryTracker)
|
||||
// Up to the entire submit operation is retried, including possibly many projects.
|
||||
// Multiply the timeout by the number of projects we're actually attempting to submit.
|
||||
.timeout(retryHelper.getDefaultTimeout().multipliedBy(cs.projects().size()))
|
||||
.timeout(
|
||||
retryHelper
|
||||
.getDefaultTimeout(ActionType.CHANGE_UPDATE)
|
||||
.multipliedBy(cs.projects().size()))
|
||||
.build());
|
||||
|
||||
if (projects > 1) {
|
||||
|
||||
@@ -2954,7 +2954,9 @@ class ReceiveCommits {
|
||||
},
|
||||
// Use a multiple of the default timeout to account for inner retries that may otherwise
|
||||
// eat up the whole timeout so that no time is left to retry this outer action.
|
||||
RetryHelper.options().timeout(retryHelper.getDefaultTimeout().multipliedBy(5)).build());
|
||||
RetryHelper.options()
|
||||
.timeout(retryHelper.getDefaultTimeout(ActionType.CHANGE_UPDATE).multipliedBy(5))
|
||||
.build());
|
||||
} catch (RestApiException e) {
|
||||
logError("Can't insert patchset", e);
|
||||
} catch (UpdateException e) {
|
||||
|
||||
@@ -30,6 +30,7 @@ import com.google.auto.value.AutoValue;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.gerrit.common.Nullable;
|
||||
import com.google.gerrit.extensions.restapi.RestApiException;
|
||||
import com.google.gerrit.metrics.Counter1;
|
||||
@@ -43,6 +44,8 @@ import com.google.gerrit.server.notedb.NotesMigration;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.function.Consumer;
|
||||
import org.eclipse.jgit.lib.Config;
|
||||
@@ -138,7 +141,7 @@ public class RetryHelper {
|
||||
private final NotesMigration migration;
|
||||
private final Metrics metrics;
|
||||
private final BatchUpdate.Factory updateFactory;
|
||||
private final Duration defaultTimeout;
|
||||
private final Map<ActionType, Duration> defaultTimeouts;
|
||||
private final WaitStrategy waitStrategy;
|
||||
@Nullable private final Consumer<RetryerBuilder<?>> overwriteDefaultRetryerStrategySetup;
|
||||
|
||||
@@ -164,20 +167,35 @@ public class RetryHelper {
|
||||
this.migration = migration;
|
||||
this.updateFactory =
|
||||
new BatchUpdate.Factory(migration, reviewDbBatchUpdateFactory, noteDbBatchUpdateFactory);
|
||||
this.defaultTimeout =
|
||||
|
||||
Duration defaultTimeout =
|
||||
Duration.ofMillis(
|
||||
cfg.getTimeUnit("noteDb", null, "retryTimeout", SECONDS.toMillis(20), MILLISECONDS));
|
||||
cfg.getTimeUnit("retry", null, "timeout", SECONDS.toMillis(20), MILLISECONDS));
|
||||
this.defaultTimeouts = Maps.newEnumMap(ActionType.class);
|
||||
Arrays.stream(ActionType.values())
|
||||
.forEach(
|
||||
at ->
|
||||
defaultTimeouts.put(
|
||||
at,
|
||||
Duration.ofMillis(
|
||||
cfg.getTimeUnit(
|
||||
"retry",
|
||||
at.name(),
|
||||
"timeout",
|
||||
SECONDS.toMillis(defaultTimeout.getSeconds()),
|
||||
MILLISECONDS))));
|
||||
|
||||
this.waitStrategy =
|
||||
WaitStrategies.join(
|
||||
WaitStrategies.exponentialWait(
|
||||
cfg.getTimeUnit("noteDb", null, "retryMaxWait", SECONDS.toMillis(5), MILLISECONDS),
|
||||
cfg.getTimeUnit("retry", null, "maxWait", SECONDS.toMillis(5), MILLISECONDS),
|
||||
MILLISECONDS),
|
||||
WaitStrategies.randomWait(50, MILLISECONDS));
|
||||
this.overwriteDefaultRetryerStrategySetup = overwriteDefaultRetryerStrategySetup;
|
||||
}
|
||||
|
||||
public Duration getDefaultTimeout() {
|
||||
return defaultTimeout;
|
||||
public Duration getDefaultTimeout(ActionType actionType) {
|
||||
return defaultTimeouts.get(actionType);
|
||||
}
|
||||
|
||||
public <T> T execute(
|
||||
@@ -255,7 +273,7 @@ public class RetryHelper {
|
||||
throws Throwable {
|
||||
MetricListener listener = new MetricListener();
|
||||
try {
|
||||
RetryerBuilder<T> retryerBuilder = createRetryerBuilder(opts, exceptionPredicate);
|
||||
RetryerBuilder<T> retryerBuilder = createRetryerBuilder(actionType, opts, exceptionPredicate);
|
||||
retryerBuilder.withRetryListener(listener);
|
||||
return executeWithTimeoutCount(actionType, action, retryerBuilder.build());
|
||||
} finally {
|
||||
@@ -289,7 +307,7 @@ public class RetryHelper {
|
||||
}
|
||||
|
||||
private <O> RetryerBuilder<O> createRetryerBuilder(
|
||||
Options opts, Predicate<Throwable> exceptionPredicate) {
|
||||
ActionType actionType, Options opts, Predicate<Throwable> exceptionPredicate) {
|
||||
RetryerBuilder<O> retryerBuilder =
|
||||
RetryerBuilder.<O>newBuilder().retryIfException(exceptionPredicate);
|
||||
if (opts.listener() != null) {
|
||||
@@ -304,7 +322,8 @@ public class RetryHelper {
|
||||
return retryerBuilder
|
||||
.withStopStrategy(
|
||||
StopStrategies.stopAfterDelay(
|
||||
firstNonNull(opts.timeout(), defaultTimeout).toMillis(), MILLISECONDS))
|
||||
firstNonNull(opts.timeout(), getDefaultTimeout(actionType)).toMillis(),
|
||||
MILLISECONDS))
|
||||
.withWaitStrategy(waitStrategy);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user