Merge "Automatically retry REST operations on failure with tracing enabled"
This commit is contained in:
@@ -3790,6 +3790,14 @@ attempt fails. `<operationType>` can be `ACCOUNT_UPDATE`, `CHANGE_UPDATE`,
|
||||
Defaults to link:#retry.timeout[`retry.timeout`]; unit suffixes are supported,
|
||||
and assumes milliseconds if not specified.
|
||||
|
||||
[[retry.retryWithTraceOnFailure]]retry.retryWithTraceOnFailure::
|
||||
+
|
||||
Whether Gerrit should automatically retry operations on failure with tracing
|
||||
enabled. The automatically generated traces can help with debugging. Please
|
||||
note that only some of the REST endpoints support automatic retry.
|
||||
+
|
||||
By default this is set to false.
|
||||
|
||||
[[rules]]
|
||||
=== Section rules
|
||||
|
||||
|
||||
@@ -511,6 +511,8 @@ public class MergeOp implements AutoCloseable {
|
||||
retryHelper
|
||||
.getDefaultTimeout(ActionType.CHANGE_UPDATE)
|
||||
.multipliedBy(cs.projects().size()))
|
||||
.caller(getClass())
|
||||
.retryWithTrace(t -> !(t instanceof RestApiException))
|
||||
.build());
|
||||
|
||||
if (projects > 1) {
|
||||
|
||||
@@ -40,11 +40,14 @@ import com.google.gerrit.metrics.Field;
|
||||
import com.google.gerrit.metrics.MetricMaker;
|
||||
import com.google.gerrit.server.config.GerritServerConfig;
|
||||
import com.google.gerrit.server.logging.Metadata;
|
||||
import com.google.gerrit.server.logging.RequestId;
|
||||
import com.google.gerrit.server.logging.TraceContext;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
@@ -94,12 +97,20 @@ public class RetryHelper {
|
||||
@Nullable
|
||||
abstract Duration timeout();
|
||||
|
||||
abstract Optional<Class<?>> caller();
|
||||
|
||||
abstract Optional<Predicate<Throwable>> retryWithTrace();
|
||||
|
||||
@AutoValue.Builder
|
||||
public abstract static class Builder {
|
||||
public abstract Builder listener(RetryListener listener);
|
||||
|
||||
public abstract Builder timeout(Duration timeout);
|
||||
|
||||
public abstract Builder caller(Class<?> caller);
|
||||
|
||||
public abstract Builder retryWithTrace(Predicate<Throwable> exceptionPredicate);
|
||||
|
||||
public abstract Options build();
|
||||
}
|
||||
}
|
||||
@@ -147,6 +158,7 @@ public class RetryHelper {
|
||||
private final Map<ActionType, Duration> defaultTimeouts;
|
||||
private final WaitStrategy waitStrategy;
|
||||
@Nullable private final Consumer<RetryerBuilder<?>> overwriteDefaultRetryerStrategySetup;
|
||||
private final boolean retryWithTraceOnFailure;
|
||||
|
||||
@Inject
|
||||
RetryHelper(@GerritServerConfig Config cfg, Metrics metrics, BatchUpdate.Factory updateFactory) {
|
||||
@@ -186,6 +198,7 @@ public class RetryHelper {
|
||||
MILLISECONDS),
|
||||
WaitStrategies.randomWait(50, MILLISECONDS));
|
||||
this.overwriteDefaultRetryerStrategySetup = overwriteDefaultRetryerStrategySetup;
|
||||
this.retryWithTraceOnFailure = cfg.getBoolean("retry", "retryWithTraceOnFailure", false);
|
||||
}
|
||||
|
||||
public Duration getDefaultTimeout(ActionType actionType) {
|
||||
@@ -256,8 +269,35 @@ public class RetryHelper {
|
||||
Predicate<Throwable> exceptionPredicate)
|
||||
throws Throwable {
|
||||
MetricListener listener = new MetricListener();
|
||||
try {
|
||||
RetryerBuilder<T> retryerBuilder = createRetryerBuilder(actionType, opts, exceptionPredicate);
|
||||
try (TraceContext traceContext = TraceContext.open()) {
|
||||
RetryerBuilder<T> retryerBuilder =
|
||||
createRetryerBuilder(
|
||||
actionType,
|
||||
opts,
|
||||
t -> {
|
||||
// exceptionPredicate checks for temporary errors for which the operation should be
|
||||
// retried (e.g. LockFailure). The retry has good chances to succeed.
|
||||
if (exceptionPredicate.test(t)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// A non-recoverable failure occurred. Check if we should retry to capture a trace
|
||||
// of the failure. If a trace was already done there is no need to retry.
|
||||
if (retryWithTraceOnFailure
|
||||
&& opts.retryWithTrace().isPresent()
|
||||
&& opts.retryWithTrace().get().test(t)
|
||||
&& !traceContext.isTracing()) {
|
||||
traceContext
|
||||
.addTag(RequestId.Type.TRACE_ID, "retry-on-failure-" + new RequestId())
|
||||
.forceLogging();
|
||||
logger.atFine().withCause(t).log(
|
||||
"%s failed, retry with tracing eanbled",
|
||||
opts.caller().map(Class::getSimpleName).orElse("N/A"));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
retryerBuilder.withRetryListener(listener);
|
||||
return executeWithTimeoutCount(actionType, action, retryerBuilder.build());
|
||||
} finally {
|
||||
|
||||
@@ -17,6 +17,7 @@ package com.google.gerrit.server.update;
|
||||
import com.google.gerrit.extensions.restapi.AuthException;
|
||||
import com.google.gerrit.extensions.restapi.BadRequestException;
|
||||
import com.google.gerrit.extensions.restapi.ResourceConflictException;
|
||||
import com.google.gerrit.extensions.restapi.RestApiException;
|
||||
import com.google.gerrit.extensions.restapi.RestCollectionModifyView;
|
||||
import com.google.gerrit.extensions.restapi.RestResource;
|
||||
|
||||
@@ -32,7 +33,13 @@ public abstract class RetryingRestCollectionModifyView<
|
||||
@Override
|
||||
public final O apply(P parentResource, I input)
|
||||
throws AuthException, BadRequestException, ResourceConflictException, Exception {
|
||||
return retryHelper.execute((updateFactory) -> applyImpl(updateFactory, parentResource, input));
|
||||
RetryHelper.Options retryOptions =
|
||||
RetryHelper.options()
|
||||
.caller(getClass())
|
||||
.retryWithTrace(t -> !(t instanceof RestApiException))
|
||||
.build();
|
||||
return retryHelper.execute(
|
||||
(updateFactory) -> applyImpl(updateFactory, parentResource, input), retryOptions);
|
||||
}
|
||||
|
||||
protected abstract O applyImpl(BatchUpdate.Factory updateFactory, P parentResource, I input)
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
package com.google.gerrit.server.update;
|
||||
|
||||
import com.google.gerrit.extensions.restapi.RestApiException;
|
||||
import com.google.gerrit.extensions.restapi.RestModifyView;
|
||||
import com.google.gerrit.extensions.restapi.RestResource;
|
||||
|
||||
@@ -27,7 +28,13 @@ public abstract class RetryingRestModifyView<R extends RestResource, I, O>
|
||||
|
||||
@Override
|
||||
public final O apply(R resource, I input) throws Exception {
|
||||
return retryHelper.execute((updateFactory) -> applyImpl(updateFactory, resource, input));
|
||||
RetryHelper.Options retryOptions =
|
||||
RetryHelper.options()
|
||||
.caller(getClass())
|
||||
.retryWithTrace(t -> !(t instanceof RestApiException))
|
||||
.build();
|
||||
return retryHelper.execute(
|
||||
(updateFactory) -> applyImpl(updateFactory, resource, input), retryOptions);
|
||||
}
|
||||
|
||||
protected abstract O applyImpl(BatchUpdate.Factory updateFactory, R resource, I input)
|
||||
|
||||
@@ -16,6 +16,7 @@ package com.google.gerrit.acceptance.rest;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static org.apache.http.HttpStatus.SC_CREATED;
|
||||
import static org.apache.http.HttpStatus.SC_INTERNAL_SERVER_ERROR;
|
||||
import static org.apache.http.HttpStatus.SC_OK;
|
||||
|
||||
import com.google.auto.value.AutoValue;
|
||||
@@ -28,6 +29,7 @@ import com.google.gerrit.acceptance.AbstractDaemonTest;
|
||||
import com.google.gerrit.acceptance.GerritConfig;
|
||||
import com.google.gerrit.acceptance.PushOneCommit;
|
||||
import com.google.gerrit.acceptance.RestResponse;
|
||||
import com.google.gerrit.common.data.SubmitRecord;
|
||||
import com.google.gerrit.extensions.api.changes.ReviewInput;
|
||||
import com.google.gerrit.extensions.events.ChangeIndexedListener;
|
||||
import com.google.gerrit.extensions.registration.DynamicSet;
|
||||
@@ -44,10 +46,14 @@ import com.google.gerrit.server.logging.Metadata;
|
||||
import com.google.gerrit.server.logging.PerformanceLogger;
|
||||
import com.google.gerrit.server.logging.TraceContext;
|
||||
import com.google.gerrit.server.project.CreateProjectArgs;
|
||||
import com.google.gerrit.server.project.SubmitRuleOptions;
|
||||
import com.google.gerrit.server.query.change.ChangeData;
|
||||
import com.google.gerrit.server.rules.SubmitRule;
|
||||
import com.google.gerrit.server.validators.ProjectCreationValidationListener;
|
||||
import com.google.gerrit.server.validators.ValidationException;
|
||||
import com.google.inject.Inject;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
import java.util.SortedSet;
|
||||
@@ -79,6 +85,7 @@ public class TraceIT extends AbstractDaemonTest {
|
||||
@Inject private DynamicSet<CommitValidationListener> commitValidationListeners;
|
||||
@Inject private DynamicSet<ChangeIndexedListener> changeIndexedListeners;
|
||||
@Inject private DynamicSet<PerformanceLogger> performanceLoggers;
|
||||
@Inject private DynamicSet<SubmitRule> submitRules;
|
||||
@Inject private WorkQueue workQueue;
|
||||
|
||||
private TraceValidatingProjectCreationValidationListener projectCreationListener;
|
||||
@@ -579,6 +586,44 @@ public class TraceIT extends AbstractDaemonTest {
|
||||
assertThat(projectCreationListener.tags.get("project")).containsExactly("new24");
|
||||
}
|
||||
|
||||
@GerritConfig(name = "retry.retryWithTraceOnFailure", value = "true")
|
||||
public void autoRetryWithTrace() throws Exception {
|
||||
String changeId = createChange().getChangeId();
|
||||
approve(changeId);
|
||||
|
||||
TraceSubmitRule traceSubmitRule = new TraceSubmitRule();
|
||||
traceSubmitRule.failOnce = true;
|
||||
RegistrationHandle submitRuleRegistrationHandle = submitRules.add("gerrit", traceSubmitRule);
|
||||
try {
|
||||
RestResponse response = adminRestSession.post("/changes/" + changeId + "/submit");
|
||||
assertThat(response.getStatusCode()).isEqualTo(SC_OK);
|
||||
assertThat(response.getHeader(RestApiServlet.X_GERRIT_TRACE)).isNull();
|
||||
assertThat(traceSubmitRule.traceId).startsWith("retry-on-failure-");
|
||||
assertThat(traceSubmitRule.isLoggingForced).isTrue();
|
||||
} finally {
|
||||
submitRuleRegistrationHandle.remove();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noAutoRetryWithTraceIfDisabled() throws Exception {
|
||||
String changeId = createChange().getChangeId();
|
||||
approve(changeId);
|
||||
|
||||
TraceSubmitRule traceSubmitRule = new TraceSubmitRule();
|
||||
traceSubmitRule.failOnce = true;
|
||||
RegistrationHandle submitRuleRegistrationHandle = submitRules.add("gerrit", traceSubmitRule);
|
||||
try {
|
||||
RestResponse response = adminRestSession.post("/changes/" + changeId + "/submit");
|
||||
assertThat(response.getStatusCode()).isEqualTo(SC_INTERNAL_SERVER_ERROR);
|
||||
assertThat(response.getHeader(RestApiServlet.X_GERRIT_TRACE)).isNull();
|
||||
assertThat(traceSubmitRule.traceId).isNull();
|
||||
assertThat(traceSubmitRule.isLoggingForced).isNull();
|
||||
} finally {
|
||||
submitRuleRegistrationHandle.remove();
|
||||
}
|
||||
}
|
||||
|
||||
private void assertForceLogging(boolean expected) {
|
||||
assertThat(LoggingContext.getInstance().shouldForceLogging(null, null, false))
|
||||
.isEqualTo(expected);
|
||||
@@ -629,6 +674,28 @@ public class TraceIT extends AbstractDaemonTest {
|
||||
public void onChangeDeleted(int id) {}
|
||||
}
|
||||
|
||||
private static class TraceSubmitRule implements SubmitRule {
|
||||
String traceId;
|
||||
Boolean isLoggingForced;
|
||||
boolean failOnce;
|
||||
|
||||
@Override
|
||||
public Collection<SubmitRecord> evaluate(ChangeData changeData, SubmitRuleOptions options) {
|
||||
if (failOnce) {
|
||||
failOnce = false;
|
||||
throw new IllegalStateException("forced failure from test");
|
||||
}
|
||||
|
||||
this.traceId =
|
||||
Iterables.getFirst(LoggingContext.getInstance().getTagsAsMap().get("TRACE_ID"), null);
|
||||
this.isLoggingForced = LoggingContext.getInstance().shouldForceLogging(null, null, false);
|
||||
|
||||
SubmitRecord submitRecord = new SubmitRecord();
|
||||
submitRecord.status = SubmitRecord.Status.OK;
|
||||
return ImmutableList.of(submitRecord);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestPerformanceLogger implements PerformanceLogger {
|
||||
private List<PerformanceLogEntry> logEntries = new ArrayList<>();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user