Merge changes I8c164877,Idfd86ba8
* changes: ReceiveCommits: Retry scanning for changes to auto-close RetryHelper: Rename and parameterize the metrics
This commit is contained in:
@@ -13,6 +13,13 @@ The following metrics are reported.
|
||||
* `build/label`: Version of Gerrit server software.
|
||||
* `events`: Triggered events.
|
||||
|
||||
=== Actions
|
||||
|
||||
* `action/retry_attempt_counts`: Distribution of number of attempts made
|
||||
by RetryHelper to execute an action (1 == single attempt, no retry)
|
||||
* `action/retry_timeout_count`: Number of action executions of RetryHelper
|
||||
that ultimately timed out
|
||||
|
||||
=== Process
|
||||
|
||||
* `proc/birth_timestamp`: Time at which the Gerrit process started.
|
||||
@@ -87,10 +94,6 @@ topic submissions that concluded successfully.
|
||||
|
||||
* `batch_update/execute_change_ops`: BatchUpdate change update latency,
|
||||
excluding reindexing
|
||||
* `batch_update/retry_attempt_counts`: Distribution of number of attempts made
|
||||
by RetryHelper (1 == single attempt, no retry)
|
||||
* `batch_update/retry_timeout_count`: Number of executions of RetryHelper that
|
||||
ultimately timed out
|
||||
|
||||
=== NoteDb
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ import com.google.gerrit.server.index.change.ReindexAfterRefUpdate;
|
||||
import com.google.gerrit.server.mail.send.OutgoingEmailValidator;
|
||||
import com.google.gerrit.server.update.RefUpdateUtil;
|
||||
import com.google.gerrit.server.update.RetryHelper;
|
||||
import com.google.gerrit.server.update.RetryHelper.ActionType;
|
||||
import com.google.gwtorm.server.OrmDuplicateKeyException;
|
||||
import com.google.gwtorm.server.OrmException;
|
||||
import com.google.inject.Inject;
|
||||
@@ -473,6 +474,7 @@ public class AccountsUpdate {
|
||||
private Account deleteAccount(Account.Id accountId)
|
||||
throws IOException, OrmException, ConfigInvalidException {
|
||||
return retryHelper.execute(
|
||||
ActionType.ACCOUNT_UPDATE,
|
||||
() -> {
|
||||
deleteUserBranch(accountId);
|
||||
return null;
|
||||
@@ -525,6 +527,7 @@ public class AccountsUpdate {
|
||||
private Account updateAccount(AccountUpdate accountUpdate)
|
||||
throws IOException, ConfigInvalidException, OrmException {
|
||||
return retryHelper.execute(
|
||||
ActionType.ACCOUNT_UPDATE,
|
||||
() -> {
|
||||
try (Repository allUsersRepo = repoManager.openRepository(allUsersName)) {
|
||||
UpdatedAccount updatedAccount = accountUpdate.update(allUsersRepo);
|
||||
|
||||
@@ -28,6 +28,7 @@ import com.google.gerrit.server.config.AllUsersName;
|
||||
import com.google.gerrit.server.git.GitRepositoryManager;
|
||||
import com.google.gerrit.server.git.MetaDataUpdate;
|
||||
import com.google.gerrit.server.update.RetryHelper;
|
||||
import com.google.gerrit.server.update.RetryHelper.ActionType;
|
||||
import com.google.gwtorm.server.OrmDuplicateKeyException;
|
||||
import com.google.gwtorm.server.OrmException;
|
||||
import com.google.inject.Inject;
|
||||
@@ -427,6 +428,7 @@ public class ExternalIdsUpdate {
|
||||
private void updateNoteMap(ExternalIdUpdater updater)
|
||||
throws IOException, ConfigInvalidException, OrmException {
|
||||
retryHelper.execute(
|
||||
ActionType.ACCOUNT_UPDATE,
|
||||
() -> {
|
||||
try (Repository repo = repoManager.openRepository(allUsersName)) {
|
||||
ExternalIdNotes extIdNotes =
|
||||
|
||||
@@ -148,6 +148,7 @@ import com.google.gerrit.server.update.Context;
|
||||
import com.google.gerrit.server.update.RepoContext;
|
||||
import com.google.gerrit.server.update.RepoOnlyOp;
|
||||
import com.google.gerrit.server.update.RetryHelper;
|
||||
import com.google.gerrit.server.update.RetryHelper.ActionType;
|
||||
import com.google.gerrit.server.update.UpdateException;
|
||||
import com.google.gerrit.server.util.LabelVote;
|
||||
import com.google.gerrit.server.util.MagicBranch;
|
||||
@@ -2868,7 +2869,11 @@ class ReceiveCommits {
|
||||
|
||||
for (Ref ref : byCommit.get(c.copy())) {
|
||||
PatchSet.Id psId = PatchSet.Id.fromRef(ref.getName());
|
||||
Optional<ChangeData> cd = byLegacyId(psId.getParentKey());
|
||||
Optional<ChangeData> cd =
|
||||
retryHelper.execute(
|
||||
ActionType.CHANGE_QUERY,
|
||||
() -> byLegacyId(psId.getParentKey()),
|
||||
t -> t instanceof OrmException);
|
||||
if (cd.isPresent() && cd.get().change().getDest().equals(branch)) {
|
||||
existingPatchSets++;
|
||||
bu.addOp(
|
||||
@@ -2880,7 +2885,11 @@ class ReceiveCommits {
|
||||
|
||||
for (String changeId : c.getFooterLines(CHANGE_ID)) {
|
||||
if (byKey == null) {
|
||||
byKey = openChangesByKeyByBranch(branch);
|
||||
byKey =
|
||||
retryHelper.execute(
|
||||
ActionType.CHANGE_QUERY,
|
||||
() -> openChangesByKeyByBranch(branch),
|
||||
t -> t instanceof OrmException);
|
||||
}
|
||||
|
||||
ChangeNotes onto = byKey.get(new Change.Key(changeId.trim()));
|
||||
@@ -2926,7 +2935,10 @@ class ReceiveCommits {
|
||||
logError("Failed to auto-close changes", e);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
},
|
||||
// Use a multiple of the default timeout to account for inner retries that may otherwise
|
||||
// eat up the whole timeout so that no time is left to retry this outer action.
|
||||
RetryHelper.options().timeout(retryHelper.getDefaultTimeout().multipliedBy(5)).build());
|
||||
} catch (RestApiException e) {
|
||||
logError("Can't insert patchset", e);
|
||||
} catch (UpdateException e) {
|
||||
|
||||
@@ -32,9 +32,10 @@ import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.gerrit.common.Nullable;
|
||||
import com.google.gerrit.extensions.restapi.RestApiException;
|
||||
import com.google.gerrit.metrics.Counter0;
|
||||
import com.google.gerrit.metrics.Counter1;
|
||||
import com.google.gerrit.metrics.Description;
|
||||
import com.google.gerrit.metrics.Histogram0;
|
||||
import com.google.gerrit.metrics.Field;
|
||||
import com.google.gerrit.metrics.Histogram1;
|
||||
import com.google.gerrit.metrics.MetricMaker;
|
||||
import com.google.gerrit.server.config.GerritServerConfig;
|
||||
import com.google.gerrit.server.git.LockFailureException;
|
||||
@@ -61,6 +62,12 @@ public class RetryHelper {
|
||||
T call() throws Exception;
|
||||
}
|
||||
|
||||
public enum ActionType {
|
||||
ACCOUNT_UPDATE,
|
||||
CHANGE_QUERY,
|
||||
CHANGE_UPDATE
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for retrying a single operation.
|
||||
*
|
||||
@@ -96,25 +103,29 @@ public class RetryHelper {
|
||||
@VisibleForTesting
|
||||
@Singleton
|
||||
public static class Metrics {
|
||||
final Histogram0 attemptCounts;
|
||||
final Counter0 timeoutCount;
|
||||
final Histogram1<ActionType> attemptCounts;
|
||||
final Counter1<ActionType> timeoutCount;
|
||||
|
||||
@Inject
|
||||
Metrics(MetricMaker metricMaker) {
|
||||
Field<ActionType> view = Field.ofEnum(ActionType.class, "action_type");
|
||||
attemptCounts =
|
||||
metricMaker.newHistogram(
|
||||
"batch_update/retry_attempt_counts",
|
||||
"action/retry_attempt_counts",
|
||||
new Description(
|
||||
"Distribution of number of attempts made by RetryHelper"
|
||||
"Distribution of number of attempts made by RetryHelper to execute an action"
|
||||
+ " (1 == single attempt, no retry)")
|
||||
.setCumulative()
|
||||
.setUnit("attempts"));
|
||||
.setUnit("attempts"),
|
||||
view);
|
||||
timeoutCount =
|
||||
metricMaker.newCounter(
|
||||
"batch_update/retry_timeout_count",
|
||||
new Description("Number of executions of RetryHelper that ultimately timed out")
|
||||
"action/retry_timeout_count",
|
||||
new Description(
|
||||
"Number of action executions of RetryHelper that ultimately timed out")
|
||||
.setCumulative()
|
||||
.setUnit("timeouts"));
|
||||
.setUnit("timeouts"),
|
||||
view);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,9 +182,16 @@ public class RetryHelper {
|
||||
return defaultTimeout;
|
||||
}
|
||||
|
||||
public <T> T execute(Action<T> action) throws IOException, ConfigInvalidException, OrmException {
|
||||
public <T> T execute(ActionType actionType, Action<T> action)
|
||||
throws IOException, ConfigInvalidException, OrmException {
|
||||
return execute(actionType, action, t -> t instanceof LockFailureException);
|
||||
}
|
||||
|
||||
public <T> T execute(
|
||||
ActionType actionType, Action<T> action, Predicate<Throwable> exceptionPredicate)
|
||||
throws IOException, ConfigInvalidException, OrmException {
|
||||
try {
|
||||
return execute(action, defaults(), t -> t instanceof LockFailureException);
|
||||
return execute(actionType, action, defaults(), exceptionPredicate);
|
||||
} catch (Throwable t) {
|
||||
Throwables.throwIfUnchecked(t);
|
||||
Throwables.throwIfInstanceOf(t, IOException.class);
|
||||
@@ -195,10 +213,13 @@ public class RetryHelper {
|
||||
// transactions. Either way, retrying a partially-failed operation is not idempotent, so
|
||||
// don't do it automatically. Let the end user decide whether they want to retry.
|
||||
return execute(
|
||||
() -> changeAction.call(updateFactory), RetryerBuilder.<T>newBuilder().build());
|
||||
ActionType.CHANGE_UPDATE,
|
||||
() -> changeAction.call(updateFactory),
|
||||
RetryerBuilder.<T>newBuilder().build());
|
||||
}
|
||||
|
||||
return execute(
|
||||
ActionType.CHANGE_UPDATE,
|
||||
() -> changeAction.call(updateFactory),
|
||||
opts,
|
||||
t -> {
|
||||
@@ -218,6 +239,7 @@ public class RetryHelper {
|
||||
/**
|
||||
* Executes an action with a given retryer.
|
||||
*
|
||||
* @param actionType the type of the action
|
||||
* @param action the action which should be executed and retried on failure
|
||||
* @param opts options for retrying the action on failure
|
||||
* @param exceptionPredicate predicate to control on which exception the action should be retried
|
||||
@@ -225,33 +247,39 @@ public class RetryHelper {
|
||||
* @throws Throwable any error or exception that made the action fail, callers are expected to
|
||||
* catch and inspect this Throwable to decide carefully whether it should be re-thrown
|
||||
*/
|
||||
private <T> T execute(Action<T> action, Options opts, Predicate<Throwable> exceptionPredicate)
|
||||
private <T> T execute(
|
||||
ActionType actionType,
|
||||
Action<T> action,
|
||||
Options opts,
|
||||
Predicate<Throwable> exceptionPredicate)
|
||||
throws Throwable {
|
||||
MetricListener listener = new MetricListener();
|
||||
try {
|
||||
RetryerBuilder<T> retryerBuilder = createRetryerBuilder(opts, exceptionPredicate);
|
||||
retryerBuilder.withRetryListener(listener);
|
||||
return execute(action, retryerBuilder.build());
|
||||
return execute(actionType, action, retryerBuilder.build());
|
||||
} finally {
|
||||
metrics.attemptCounts.record(listener.getAttemptCount());
|
||||
metrics.attemptCounts.record(actionType, listener.getAttemptCount());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes an action with a given retryer.
|
||||
*
|
||||
* @param actionType the type of the action
|
||||
* @param action the action which should be executed and retried on failure
|
||||
* @param retryer the retryer
|
||||
* @return the result of executing the action
|
||||
* @throws Throwable any error or exception that made the action fail, callers are expected to
|
||||
* catch and inspect this Throwable to decide carefully whether it should be re-thrown
|
||||
*/
|
||||
private <T> T execute(Action<T> action, Retryer<T> retryer) throws Throwable {
|
||||
private <T> T execute(ActionType actionType, Action<T> action, Retryer<T> retryer)
|
||||
throws Throwable {
|
||||
try {
|
||||
return retryer.call(() -> action.call());
|
||||
} catch (ExecutionException | RetryException e) {
|
||||
if (e instanceof RetryException) {
|
||||
metrics.timeoutCount.increment();
|
||||
metrics.timeoutCount.increment(actionType);
|
||||
}
|
||||
if (e.getCause() != null) {
|
||||
throw e.getCause();
|
||||
|
||||
Reference in New Issue
Block a user