Merge changes I8c164877,Idfd86ba8

* changes:
  ReceiveCommits: Retry scanning for changes to auto-close
  RetryHelper: Rename and parameterize the metrics
This commit is contained in:
Dave Borowitz
2018-01-08 14:16:39 +00:00
committed by Gerrit Code Review
5 changed files with 73 additions and 25 deletions

View File

@@ -13,6 +13,13 @@ The following metrics are reported.
* `build/label`: Version of Gerrit server software.
* `events`: Triggered events.
=== Actions
* `action/retry_attempt_counts`: Distribution of number of attempts made
by RetryHelper to execute an action (1 == single attempt, no retry)
* `action/retry_timeout_count`: Number of action executions of RetryHelper
that ultimately timed out
=== Process
* `proc/birth_timestamp`: Time at which the Gerrit process started.
@@ -87,10 +94,6 @@ topic submissions that concluded successfully.
* `batch_update/execute_change_ops`: BatchUpdate change update latency,
excluding reindexing
* `batch_update/retry_attempt_counts`: Distribution of number of attempts made
by RetryHelper (1 == single attempt, no retry)
* `batch_update/retry_timeout_count`: Number of executions of RetryHelper that
ultimately timed out
=== NoteDb

View File

@@ -38,6 +38,7 @@ import com.google.gerrit.server.index.change.ReindexAfterRefUpdate;
import com.google.gerrit.server.mail.send.OutgoingEmailValidator;
import com.google.gerrit.server.update.RefUpdateUtil;
import com.google.gerrit.server.update.RetryHelper;
import com.google.gerrit.server.update.RetryHelper.ActionType;
import com.google.gwtorm.server.OrmDuplicateKeyException;
import com.google.gwtorm.server.OrmException;
import com.google.inject.Inject;
@@ -473,6 +474,7 @@ public class AccountsUpdate {
private Account deleteAccount(Account.Id accountId)
throws IOException, OrmException, ConfigInvalidException {
return retryHelper.execute(
ActionType.ACCOUNT_UPDATE,
() -> {
deleteUserBranch(accountId);
return null;
@@ -525,6 +527,7 @@ public class AccountsUpdate {
private Account updateAccount(AccountUpdate accountUpdate)
throws IOException, ConfigInvalidException, OrmException {
return retryHelper.execute(
ActionType.ACCOUNT_UPDATE,
() -> {
try (Repository allUsersRepo = repoManager.openRepository(allUsersName)) {
UpdatedAccount updatedAccount = accountUpdate.update(allUsersRepo);

View File

@@ -28,6 +28,7 @@ import com.google.gerrit.server.config.AllUsersName;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.MetaDataUpdate;
import com.google.gerrit.server.update.RetryHelper;
import com.google.gerrit.server.update.RetryHelper.ActionType;
import com.google.gwtorm.server.OrmDuplicateKeyException;
import com.google.gwtorm.server.OrmException;
import com.google.inject.Inject;
@@ -427,6 +428,7 @@ public class ExternalIdsUpdate {
private void updateNoteMap(ExternalIdUpdater updater)
throws IOException, ConfigInvalidException, OrmException {
retryHelper.execute(
ActionType.ACCOUNT_UPDATE,
() -> {
try (Repository repo = repoManager.openRepository(allUsersName)) {
ExternalIdNotes extIdNotes =

View File

@@ -148,6 +148,7 @@ import com.google.gerrit.server.update.Context;
import com.google.gerrit.server.update.RepoContext;
import com.google.gerrit.server.update.RepoOnlyOp;
import com.google.gerrit.server.update.RetryHelper;
import com.google.gerrit.server.update.RetryHelper.ActionType;
import com.google.gerrit.server.update.UpdateException;
import com.google.gerrit.server.util.LabelVote;
import com.google.gerrit.server.util.MagicBranch;
@@ -2868,7 +2869,11 @@ class ReceiveCommits {
for (Ref ref : byCommit.get(c.copy())) {
PatchSet.Id psId = PatchSet.Id.fromRef(ref.getName());
Optional<ChangeData> cd = byLegacyId(psId.getParentKey());
Optional<ChangeData> cd =
retryHelper.execute(
ActionType.CHANGE_QUERY,
() -> byLegacyId(psId.getParentKey()),
t -> t instanceof OrmException);
if (cd.isPresent() && cd.get().change().getDest().equals(branch)) {
existingPatchSets++;
bu.addOp(
@@ -2880,7 +2885,11 @@ class ReceiveCommits {
for (String changeId : c.getFooterLines(CHANGE_ID)) {
if (byKey == null) {
byKey = openChangesByKeyByBranch(branch);
byKey =
retryHelper.execute(
ActionType.CHANGE_QUERY,
() -> openChangesByKeyByBranch(branch),
t -> t instanceof OrmException);
}
ChangeNotes onto = byKey.get(new Change.Key(changeId.trim()));
@@ -2926,7 +2935,10 @@ class ReceiveCommits {
logError("Failed to auto-close changes", e);
}
return null;
});
},
// Use a multiple of the default timeout to account for inner retries that may otherwise
// eat up the whole timeout so that no time is left to retry this outer action.
RetryHelper.options().timeout(retryHelper.getDefaultTimeout().multipliedBy(5)).build());
} catch (RestApiException e) {
logError("Can't insert patchset", e);
} catch (UpdateException e) {

View File

@@ -32,9 +32,10 @@ import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.gerrit.common.Nullable;
import com.google.gerrit.extensions.restapi.RestApiException;
import com.google.gerrit.metrics.Counter0;
import com.google.gerrit.metrics.Counter1;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.Histogram0;
import com.google.gerrit.metrics.Field;
import com.google.gerrit.metrics.Histogram1;
import com.google.gerrit.metrics.MetricMaker;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.LockFailureException;
@@ -61,6 +62,12 @@ public class RetryHelper {
T call() throws Exception;
}
public enum ActionType {
ACCOUNT_UPDATE,
CHANGE_QUERY,
CHANGE_UPDATE
}
/**
* Options for retrying a single operation.
*
@@ -96,25 +103,29 @@ public class RetryHelper {
@VisibleForTesting
@Singleton
public static class Metrics {
final Histogram0 attemptCounts;
final Counter0 timeoutCount;
final Histogram1<ActionType> attemptCounts;
final Counter1<ActionType> timeoutCount;
@Inject
Metrics(MetricMaker metricMaker) {
Field<ActionType> view = Field.ofEnum(ActionType.class, "action_type");
attemptCounts =
metricMaker.newHistogram(
"batch_update/retry_attempt_counts",
"action/retry_attempt_counts",
new Description(
"Distribution of number of attempts made by RetryHelper"
"Distribution of number of attempts made by RetryHelper to execute an action"
+ " (1 == single attempt, no retry)")
.setCumulative()
.setUnit("attempts"));
.setUnit("attempts"),
view);
timeoutCount =
metricMaker.newCounter(
"batch_update/retry_timeout_count",
new Description("Number of executions of RetryHelper that ultimately timed out")
"action/retry_timeout_count",
new Description(
"Number of action executions of RetryHelper that ultimately timed out")
.setCumulative()
.setUnit("timeouts"));
.setUnit("timeouts"),
view);
}
}
@@ -171,9 +182,16 @@ public class RetryHelper {
return defaultTimeout;
}
public <T> T execute(Action<T> action) throws IOException, ConfigInvalidException, OrmException {
public <T> T execute(ActionType actionType, Action<T> action)
throws IOException, ConfigInvalidException, OrmException {
return execute(actionType, action, t -> t instanceof LockFailureException);
}
public <T> T execute(
ActionType actionType, Action<T> action, Predicate<Throwable> exceptionPredicate)
throws IOException, ConfigInvalidException, OrmException {
try {
return execute(action, defaults(), t -> t instanceof LockFailureException);
return execute(actionType, action, defaults(), exceptionPredicate);
} catch (Throwable t) {
Throwables.throwIfUnchecked(t);
Throwables.throwIfInstanceOf(t, IOException.class);
@@ -195,10 +213,13 @@ public class RetryHelper {
// transactions. Either way, retrying a partially-failed operation is not idempotent, so
// don't do it automatically. Let the end user decide whether they want to retry.
return execute(
() -> changeAction.call(updateFactory), RetryerBuilder.<T>newBuilder().build());
ActionType.CHANGE_UPDATE,
() -> changeAction.call(updateFactory),
RetryerBuilder.<T>newBuilder().build());
}
return execute(
ActionType.CHANGE_UPDATE,
() -> changeAction.call(updateFactory),
opts,
t -> {
@@ -218,6 +239,7 @@ public class RetryHelper {
/**
* Executes an action with a given retryer.
*
* @param actionType the type of the action
* @param action the action which should be executed and retried on failure
* @param opts options for retrying the action on failure
* @param exceptionPredicate predicate to control on which exception the action should be retried
@@ -225,33 +247,39 @@ public class RetryHelper {
* @throws Throwable any error or exception that made the action fail, callers are expected to
* catch and inspect this Throwable to decide carefully whether it should be re-thrown
*/
private <T> T execute(Action<T> action, Options opts, Predicate<Throwable> exceptionPredicate)
private <T> T execute(
ActionType actionType,
Action<T> action,
Options opts,
Predicate<Throwable> exceptionPredicate)
throws Throwable {
MetricListener listener = new MetricListener();
try {
RetryerBuilder<T> retryerBuilder = createRetryerBuilder(opts, exceptionPredicate);
retryerBuilder.withRetryListener(listener);
return execute(action, retryerBuilder.build());
return execute(actionType, action, retryerBuilder.build());
} finally {
metrics.attemptCounts.record(listener.getAttemptCount());
metrics.attemptCounts.record(actionType, listener.getAttemptCount());
}
}
/**
* Executes an action with a given retryer.
*
* @param actionType the type of the action
* @param action the action which should be executed and retried on failure
* @param retryer the retryer
* @return the result of executing the action
* @throws Throwable any error or exception that made the action fail, callers are expected to
* catch and inspect this Throwable to decide carefully whether it should be re-thrown
*/
private <T> T execute(Action<T> action, Retryer<T> retryer) throws Throwable {
private <T> T execute(ActionType actionType, Action<T> action, Retryer<T> retryer)
throws Throwable {
try {
return retryer.call(() -> action.call());
} catch (ExecutionException | RetryException e) {
if (e instanceof RetryException) {
metrics.timeoutCount.increment();
metrics.timeoutCount.increment(actionType);
}
if (e.getCause() != null) {
throw e.getCause();