Add metric to count how often requests are automatically retried

Signed-off-by: Edwin Kempin <ekempin@google.com>
Change-Id: Idd0e11babea2de2813ffbd741f58b3d0a43d7d4a
This commit is contained in:
Edwin Kempin
2019-08-02 13:16:55 +02:00
parent 4313344073
commit ae84332a99
3 changed files with 22 additions and 5 deletions

View File

@@ -19,6 +19,7 @@ The following metrics are reported.
by RetryHelper to execute an action (0 == single attempt, no retry)
* `action/retry_timeout_count`: Number of action executions of RetryHelper
that ultimately timed out
* `action/auto_retry_count`: Number of automatic retries with tracing
=== Pushes

View File

@@ -89,6 +89,9 @@ public abstract class Metadata {
// One or more resources
public abstract Optional<Boolean> multiple();
// The name of an operation that is performed.
public abstract Optional<String> operationName();
// Partial or full computation
public abstract Optional<Boolean> partial();
@@ -185,6 +188,8 @@ public abstract class Metadata {
public abstract Builder multiple(boolean multiple);
public abstract Builder operationName(String operationName);
public abstract Builder partial(boolean partial);
public abstract Builder noteDbFilePath(@Nullable String noteDbFilePath);

View File

@@ -35,6 +35,7 @@ import com.google.gerrit.common.Nullable;
import com.google.gerrit.extensions.restapi.RestApiException;
import com.google.gerrit.git.LockFailureException;
import com.google.gerrit.metrics.Counter1;
import com.google.gerrit.metrics.Counter2;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.Field;
import com.google.gerrit.metrics.MetricMaker;
@@ -120,6 +121,7 @@ public class RetryHelper {
public static class Metrics {
final Counter1<ActionType> attemptCounts;
final Counter1<ActionType> timeoutCount;
final Counter2<ActionType, String> autoRetryCount;
@Inject
Metrics(MetricMaker metricMaker) {
@@ -142,6 +144,16 @@ public class RetryHelper {
.setCumulative()
.setUnit("timeouts"),
actionTypeField);
autoRetryCount =
metricMaker.newCounter(
"action/auto_retry_count",
new Description("Number of automatic retries with tracing")
.setCumulative()
.setUnit("retries"),
actionTypeField,
Field.ofString("operation_name", Metadata.Builder::operationName)
.description("The name of the operation that was retried.")
.build());
}
}
@@ -286,22 +298,21 @@ public class RetryHelper {
if (retryWithTraceOnFailure
&& opts.retryWithTrace().isPresent()
&& opts.retryWithTrace().get().test(t)) {
String caller = opts.caller().map(Class::getSimpleName).orElse("N/A");
if (!traceContext.isTracing()) {
traceContext
.addTag(RequestId.Type.TRACE_ID, "retry-on-failure-" + new RequestId())
.forceLogging();
logger.atFine().withCause(t).log(
"%s failed, retry with tracing enabled",
opts.caller().map(Class::getSimpleName).orElse("N/A"));
"%s failed, retry with tracing enabled", caller);
metrics.autoRetryCount.increment(actionType, caller);
return true;
}
// A non-recoverable failure occurred. We retried the operation with tracing
// enabled and it failed again. Log the failure so that admin can see if it
// differs from the failure that triggered the retry.
logger.atFine().withCause(t).log(
"auto-retry of %s has failed",
opts.caller().map(Class::getSimpleName).orElse("N/A"));
logger.atFine().withCause(t).log("auto-retry of %s has failed", caller);
return false;
}