From 2b87d7f8624caa14b4aacbd6e3954809e32a463c Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Tue, 16 May 2017 18:48:41 -0400 Subject: [PATCH] RetryHelper: Make time limits configurable Use an arbitrarily high limit during NoteDbOnlyIT, where we explicitly want retries to happen, in order to avoid spurious timeouts caused by scheduling delays on overloaded test machines. For other tests, LockFailureExceptions generally shouldn't crop up during updates anyway, and if they do, we would rather the updates time out in finite amount of time so we can see the resulting stack trace. Change-Id: I5bad32749b994e6473176cdd45cc80d9d4e195eb --- Documentation/dev-note-db.txt | 31 +++++++++++++++++++ .../server/notedb/NoteDbOnlyIT.java | 10 ++++++ .../gerrit/server/update/RetryHelper.java | 29 ++++++++++++----- 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/Documentation/dev-note-db.txt b/Documentation/dev-note-db.txt index e84fecfea1..e5bd54eab0 100644 --- a/Documentation/dev-note-db.txt +++ b/Documentation/dev-note-db.txt @@ -146,3 +146,34 @@ Here, the migration process looks like: - Run a Flume to migrate all existing changes to NoteDb primary. (Also closed-source, but basically just a wrapper around `PrimaryStorageMigrator`.) - Turn off access to ReviewDb changes tables. + +== Configuration + +This section describes general configuration for the NoteDb backend that is not +directly related to the migration process. These options will continue to be +supported after the migration is complete and the migration-related options are +obsolete. + +[[noteDb.retryMaxWait]]noteDb.retryMaxWait:: ++ +Maximum time to wait between attempts to retry update operations when one +attempt fails due to contention (aka lock failure) on the underlying ref +storage. Operations are retried with exponential backoff, plus some random +jitter, until the interval reaches this limit. After that, retries continue to +occur after a fixed timeout (plus jitter), up to +link:#noteDb.retryTimeout[`noteDb.retryTimeout`]. ++ +Only applicable when `noteDb.changes.fuseUpdates=true`. ++ +Defaults to 5 seconds; unit suffixes are supported, and assumes milliseconds if +not specified. + +[[noteDb.retryTimeout]]noteDb.retryTimeout:: ++ +Total timeout for retrying update operations when one attempt fails due to +contention (aka lock failure) on the underlying ref storage. ++ +Only applicable when `noteDb.changes.fuseUpdates=true`. ++ +Defaults to 20 seconds; unit suffixes are supported, and assumes milliseconds if +not specified. diff --git a/gerrit-acceptance-tests/src/test/java/com/google/gerrit/acceptance/server/notedb/NoteDbOnlyIT.java b/gerrit-acceptance-tests/src/test/java/com/google/gerrit/acceptance/server/notedb/NoteDbOnlyIT.java index 2c61e641a5..9859086b0b 100644 --- a/gerrit-acceptance-tests/src/test/java/com/google/gerrit/acceptance/server/notedb/NoteDbOnlyIT.java +++ b/gerrit-acceptance-tests/src/test/java/com/google/gerrit/acceptance/server/notedb/NoteDbOnlyIT.java @@ -36,6 +36,7 @@ import com.google.gerrit.server.update.BatchUpdateOp; import com.google.gerrit.server.update.ChangeContext; import com.google.gerrit.server.update.RepoContext; import com.google.gerrit.server.update.RetryHelper; +import com.google.gerrit.testutil.ConfigSuite; import com.google.inject.Inject; import java.io.IOException; import java.util.Collections; @@ -45,6 +46,7 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; import org.eclipse.jgit.internal.storage.dfs.InMemoryRepository; import org.eclipse.jgit.lib.CommitBuilder; +import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectInserter; @@ -58,6 +60,14 @@ import org.junit.Before; import org.junit.Test; public class NoteDbOnlyIT extends AbstractDaemonTest { + @ConfigSuite.Default + public static Config defaultConfig() { + Config cfg = new Config(); + // Avoid spurious timeouts during intentional retries due to overloaded test machines. + cfg.setString("noteDb", null, "retryTimeout", Integer.MAX_VALUE + "s"); + return cfg; + } + @Inject private RetryHelper retryHelper; @Before diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/update/RetryHelper.java b/gerrit-server/src/main/java/com/google/gerrit/server/update/RetryHelper.java index 0c2e0da0a5..403b0a4dce 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/update/RetryHelper.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/update/RetryHelper.java @@ -14,18 +14,24 @@ package com.google.gerrit.server.update; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; + import com.github.rholder.retry.RetryException; import com.github.rholder.retry.RetryerBuilder; import com.github.rholder.retry.StopStrategies; +import com.github.rholder.retry.StopStrategy; import com.github.rholder.retry.WaitStrategies; +import com.github.rholder.retry.WaitStrategy; import com.google.common.base.Throwables; import com.google.gerrit.extensions.restapi.RestApiException; +import com.google.gerrit.server.config.GerritServerConfig; import com.google.gerrit.server.git.LockFailureException; import com.google.gerrit.server.notedb.NotesMigration; import com.google.inject.Inject; import com.google.inject.Singleton; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; +import org.eclipse.jgit.lib.Config; @Singleton public class RetryHelper { @@ -35,9 +41,12 @@ public class RetryHelper { private final NotesMigration migration; private final BatchUpdate.Factory updateFactory; + private final StopStrategy stopStrategy; + private final WaitStrategy waitStrategy; @Inject RetryHelper( + @GerritServerConfig Config cfg, NotesMigration migration, ReviewDbBatchUpdate.AssistedFactory reviewDbBatchUpdateFactory, FusedNoteDbBatchUpdate.AssistedFactory fusedNoteDbBatchUpdateFactory, @@ -49,19 +58,25 @@ public class RetryHelper { reviewDbBatchUpdateFactory, fusedNoteDbBatchUpdateFactory, unfusedNoteDbBatchUpdateFactory); + this.stopStrategy = + StopStrategies.stopAfterDelay( + cfg.getTimeUnit("noteDb", null, "retryTimeout", SECONDS.toMillis(5), MILLISECONDS), + MILLISECONDS); + this.waitStrategy = + WaitStrategies.join( + WaitStrategies.exponentialWait( + cfg.getTimeUnit("noteDb", null, "retryMaxWait", SECONDS.toMillis(20), MILLISECONDS), + MILLISECONDS), + WaitStrategies.randomWait(50, MILLISECONDS)); } public T execute(Action action) throws RestApiException, UpdateException { try { RetryerBuilder builder = RetryerBuilder.newBuilder(); if (migration.disableChangeReviewDb() && migration.fuseUpdates()) { - // TODO(dborowitz): Make configurable. builder - .withStopStrategy(StopStrategies.stopAfterDelay(20, TimeUnit.SECONDS)) - .withWaitStrategy( - WaitStrategies.join( - WaitStrategies.exponentialWait(5, TimeUnit.SECONDS), - WaitStrategies.randomWait(50, TimeUnit.MILLISECONDS))) + .withStopStrategy(stopStrategy) + .withWaitStrategy(waitStrategy) .retryIfException(RetryHelper::isLockFailure); } else { // Either we aren't full-NoteDb, or the underlying ref storage doesn't support atomic