Add metric to count how often requests are automatically retried
Signed-off-by: Edwin Kempin <ekempin@google.com> Change-Id: Idd0e11babea2de2813ffbd741f58b3d0a43d7d4a
This commit is contained in:
		@@ -19,6 +19,7 @@ The following metrics are reported.
 | 
			
		||||
by RetryHelper to execute an action (0 == single attempt, no retry)
 | 
			
		||||
* `action/retry_timeout_count`: Number of action executions of RetryHelper
 | 
			
		||||
that ultimately timed out
 | 
			
		||||
* `action/auto_retry_count`: Number of automatic retries with tracing
 | 
			
		||||
 | 
			
		||||
=== Pushes
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -89,6 +89,9 @@ public abstract class Metadata {
 | 
			
		||||
  // One or more resources
 | 
			
		||||
  public abstract Optional<Boolean> multiple();
 | 
			
		||||
 | 
			
		||||
  // The name of an operation that is performed.
 | 
			
		||||
  public abstract Optional<String> operationName();
 | 
			
		||||
 | 
			
		||||
  // Partial or full computation
 | 
			
		||||
  public abstract Optional<Boolean> partial();
 | 
			
		||||
 | 
			
		||||
@@ -185,6 +188,8 @@ public abstract class Metadata {
 | 
			
		||||
 | 
			
		||||
    public abstract Builder multiple(boolean multiple);
 | 
			
		||||
 | 
			
		||||
    public abstract Builder operationName(String operationName);
 | 
			
		||||
 | 
			
		||||
    public abstract Builder partial(boolean partial);
 | 
			
		||||
 | 
			
		||||
    public abstract Builder noteDbFilePath(@Nullable String noteDbFilePath);
 | 
			
		||||
 
 | 
			
		||||
@@ -35,6 +35,7 @@ import com.google.gerrit.common.Nullable;
 | 
			
		||||
import com.google.gerrit.extensions.restapi.RestApiException;
 | 
			
		||||
import com.google.gerrit.git.LockFailureException;
 | 
			
		||||
import com.google.gerrit.metrics.Counter1;
 | 
			
		||||
import com.google.gerrit.metrics.Counter2;
 | 
			
		||||
import com.google.gerrit.metrics.Description;
 | 
			
		||||
import com.google.gerrit.metrics.Field;
 | 
			
		||||
import com.google.gerrit.metrics.MetricMaker;
 | 
			
		||||
@@ -120,6 +121,7 @@ public class RetryHelper {
 | 
			
		||||
  public static class Metrics {
 | 
			
		||||
    final Counter1<ActionType> attemptCounts;
 | 
			
		||||
    final Counter1<ActionType> timeoutCount;
 | 
			
		||||
    final Counter2<ActionType, String> autoRetryCount;
 | 
			
		||||
 | 
			
		||||
    @Inject
 | 
			
		||||
    Metrics(MetricMaker metricMaker) {
 | 
			
		||||
@@ -142,6 +144,16 @@ public class RetryHelper {
 | 
			
		||||
                  .setCumulative()
 | 
			
		||||
                  .setUnit("timeouts"),
 | 
			
		||||
              actionTypeField);
 | 
			
		||||
      autoRetryCount =
 | 
			
		||||
          metricMaker.newCounter(
 | 
			
		||||
              "action/auto_retry_count",
 | 
			
		||||
              new Description("Number of automatic retries with tracing")
 | 
			
		||||
                  .setCumulative()
 | 
			
		||||
                  .setUnit("retries"),
 | 
			
		||||
              actionTypeField,
 | 
			
		||||
              Field.ofString("operation_name", Metadata.Builder::operationName)
 | 
			
		||||
                  .description("The name of the operation that was retried.")
 | 
			
		||||
                  .build());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -286,22 +298,21 @@ public class RetryHelper {
 | 
			
		||||
                if (retryWithTraceOnFailure
 | 
			
		||||
                    && opts.retryWithTrace().isPresent()
 | 
			
		||||
                    && opts.retryWithTrace().get().test(t)) {
 | 
			
		||||
                  String caller = opts.caller().map(Class::getSimpleName).orElse("N/A");
 | 
			
		||||
                  if (!traceContext.isTracing()) {
 | 
			
		||||
                    traceContext
 | 
			
		||||
                        .addTag(RequestId.Type.TRACE_ID, "retry-on-failure-" + new RequestId())
 | 
			
		||||
                        .forceLogging();
 | 
			
		||||
                    logger.atFine().withCause(t).log(
 | 
			
		||||
                        "%s failed, retry with tracing enabled",
 | 
			
		||||
                        opts.caller().map(Class::getSimpleName).orElse("N/A"));
 | 
			
		||||
                        "%s failed, retry with tracing enabled", caller);
 | 
			
		||||
                    metrics.autoRetryCount.increment(actionType, caller);
 | 
			
		||||
                    return true;
 | 
			
		||||
                  }
 | 
			
		||||
 | 
			
		||||
                  // A non-recoverable failure occurred. We retried the operation with tracing
 | 
			
		||||
                  // enabled and it failed again. Log the failure so that admin can see if it
 | 
			
		||||
                  // differs from the failure that triggered the retry.
 | 
			
		||||
                  logger.atFine().withCause(t).log(
 | 
			
		||||
                      "auto-retry of %s has failed",
 | 
			
		||||
                      opts.caller().map(Class::getSimpleName).orElse("N/A"));
 | 
			
		||||
                  logger.atFine().withCause(t).log("auto-retry of %s has failed", caller);
 | 
			
		||||
                  return false;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user