RebuildNotedb: batch changes by project

If we batch changes by project before rebuilding them and writing them
to the notedb, we can use a single BatchRefUpdate for all of the
changes in a project, which reduces overhead for writing every change
to the notedb.

Additionally, within the code the rebuilds each change, I synchronized
on the BatchRefUpdate object because it is not thread safe. Since all
changes in a project will be using the same one, we can just
synchronize the function calls that modify that BatchRefUpdate.

Change-Id: I4af196fa720180b0846e9a6e7cc6d9083a75f695
This commit is contained in:
Yacob Yonas
2014-08-08 11:11:47 -07:00
parent 7c51481994
commit 904c08183c
2 changed files with 116 additions and 64 deletions

View File

@@ -17,7 +17,9 @@ package com.google.gerrit.pgm;
import static com.google.gerrit.server.schema.DataSourceProvider.Context.MULTI_USER;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.util.concurrent.AsyncFunction;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
@@ -29,7 +31,9 @@ import com.google.gerrit.pgm.util.BatchProgramModule;
import com.google.gerrit.pgm.util.SiteProgram;
import com.google.gerrit.pgm.util.ThreadLimiter;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.reviewdb.client.Project;
import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.MultiProgressMonitor;
import com.google.gerrit.server.git.MultiProgressMonitor.Task;
import com.google.gerrit.server.git.WorkQueue;
@@ -43,6 +47,9 @@ import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.TypeLiteral;
import org.eclipse.jgit.lib.BatchRefUpdate;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevWalk;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -78,20 +85,41 @@ public class RebuildNotedb extends SiteProgram {
sysManager.start();
ListeningExecutorService executor = newExecutor();
final MultiProgressMonitor mpm =
new MultiProgressMonitor(System.out, "Rebuilding notedb");
final Task doneTask =
mpm.beginSubTask("changes", MultiProgressMonitor.UNKNOWN);
final Task failedTask =
mpm.beginSubTask("failed", MultiProgressMonitor.UNKNOWN);
System.out.println("Rebuilding the notedb");
ChangeRebuilder rebuilder = sysInjector.getInstance(ChangeRebuilder.class);
List<Change> allChanges = getAllChanges();
final List<ListenableFuture<?>> futures = Lists.newArrayList();
Multimap<Project.NameKey, Change> changesByProject = getChangesByProject();
final AtomicBoolean ok = new AtomicBoolean(true);
Stopwatch sw = Stopwatch.createStarted();
for (final Change c : allChanges) {
final ListenableFuture<?> future = rebuilder.rebuildAsync(c, executor);
GitRepositoryManager repoManager =
sysInjector.getInstance(GitRepositoryManager.class);
for (final Project.NameKey project : changesByProject.keySet()) {
final Repository repo = repoManager.openRepository(project);
try {
final BatchRefUpdate bru = repo.getRefDatabase().newBatchUpdate();
List<ListenableFuture<?>> futures = Lists.newArrayList();
// Here, we truncate the project name to 50 characters to ensure that
// the whole monitor line for a project fits on one line (<80 chars).
int monitorStringMaxLength = 50;
String projectString = project.toString();
String monitorString = (projectString.length() > monitorStringMaxLength)
? projectString.substring(0, monitorStringMaxLength)
: projectString;
if (projectString.length() > monitorString.length()) {
monitorString = monitorString + "...";
}
final MultiProgressMonitor mpm = new MultiProgressMonitor(System.out,
monitorString);
final Task doneTask =
mpm.beginSubTask("done", changesByProject.get(project).size());
final Task failedTask = mpm.beginSubTask("failed",
MultiProgressMonitor.UNKNOWN);
for (final Change c : changesByProject.get(project)) {
final ListenableFuture<?> future =
rebuilder.rebuildAsync(c, executor, bru);
futures.add(future);
future.addListener(new Runnable() {
@Override
@@ -127,22 +155,35 @@ public class RebuildNotedb extends SiteProgram {
}
}, MoreExecutors.sameThreadExecutor());
}
try {
mpm.waitFor(Futures.transform(Futures.successfulAsList(futures),
new AsyncFunction<List<?>, Void>() {
@Override
public ListenableFuture<Void> apply(List<?> input) {
public ListenableFuture<Void> apply(List<?> input)
throws Exception {
Task t = mpm.beginSubTask("update refs",
MultiProgressMonitor.UNKNOWN);
RevWalk walk = new RevWalk(repo);
try {
bru.execute(walk, t);
mpm.end();
return Futures.immediateFuture(null);
} finally {
walk.release();
}
}
}));
} catch (ExecutionException e) {
} catch (Exception e) {
log.error("Error rebuilding notedb", e);
ok.set(false);
break;
} finally {
repo.close();
}
}
double t = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
System.out.format("Rebuild %d changes in %.01fs (%.01f/s)\n",
allChanges.size(), t, allChanges.size() / t);
changesByProject.size(), t, changesByProject.size() / t);
return ok.get() ? 0 : 1;
}
@@ -168,17 +209,20 @@ public class RebuildNotedb extends SiteProgram {
}
}
private List<Change> getAllChanges() throws OrmException {
// Memoize all changes to a list so we can close the db connection and allow
private Multimap<Project.NameKey, Change> getChangesByProject()
throws OrmException {
// Memorize all changes so we can close the db connection and allow
// rebuilder threads to use the full connection pool.
// TODO(dborowitz): May need to batch changes, e.g. by project (though note
// that unlike Reindex, we don't think there is an inherent benefit to
// grouping by project), to avoid wasting too much memory here.
SchemaFactory<ReviewDb> schemaFactory = sysInjector.getInstance(Key.get(
new TypeLiteral<SchemaFactory<ReviewDb>>() {}));
ReviewDb db = schemaFactory.open();
Multimap<Project.NameKey, Change> changesByProject =
ArrayListMultimap.create();
try {
return db.changes().all().toList();
for (Change c : db.changes().all()) {
changesByProject.put(c.getProject(), c);
}
return changesByProject;
} finally {
db.close();
}

View File

@@ -72,12 +72,12 @@ public class ChangeRebuilder {
this.updateFactory = updateFactory;
}
public ListenableFuture<?> rebuildAsync(
final Change change, ListeningExecutorService executor) {
public ListenableFuture<?> rebuildAsync(final Change change,
ListeningExecutorService executor, final BatchRefUpdate bru) {
return executor.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
rebuild(change, null);
rebuild(change, bru);
return null;
}
});
@@ -109,7 +109,7 @@ public class ChangeRebuilder {
controlFactory.controlFor(change, user), e.when);
update.setPatchSetId(e.psId);
if (batch == null) {
batch = update.openUpdate();
batch = update.openUpdateInBatch(bru);
}
}
e.apply(update);
@@ -118,9 +118,17 @@ public class ChangeRebuilder {
if (update != null) {
writeToBatch(batch, update);
}
// Since the BatchMetaDataUpdates generated by all ChangeRebuilders on a
// given project are backed by the same BatchRefUpdate, we need to
// synchronize on the BatchRefUpdate. Therefore, since commit on a
// BatchMetaDataUpdate is the only method that modifies a BatchRefUpdate,
// we can just synchronize this call.
synchronized (bru) {
batch.commit();
}
}
}
private void writeToBatch(BatchMetaDataUpdate batch, ChangeUpdate update)
throws IOException {