Change indexing: avoid computing current file paths too early

The currentFilePaths used to be computed very early in the reindexing
code. However, we still needed to go through the diff_summary cache
for changed lines. The diff_summary cache contains also the
currentFilePaths so we can simply get these paths from the cache.

This avoids expensive diff computations in JGit.

Change-Id: I30781f6b918ed2ca0cf0137bbfb21d92e0ba13af
This commit is contained in:
Saša Živkov
2016-10-11 09:57:21 +02:00
parent 6fd1b6f1f6
commit 43b50f87fb
3 changed files with 12 additions and 84 deletions

View File

@@ -22,25 +22,20 @@ import static org.eclipse.jgit.lib.RefDatabase.ALL;
import com.google.common.base.Stopwatch; import com.google.common.base.Stopwatch;
import com.google.common.collect.ComparisonChain; import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ListMultimap; import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.MultimapBuilder; import com.google.common.collect.MultimapBuilder;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.gerrit.reviewdb.client.Change; import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.reviewdb.client.Project; import com.google.gerrit.reviewdb.client.Project;
import com.google.gerrit.reviewdb.server.ReviewDb; import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.GitRepositoryManager; import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.MergeUtil;
import com.google.gerrit.server.git.MultiProgressMonitor; import com.google.gerrit.server.git.MultiProgressMonitor;
import com.google.gerrit.server.git.MultiProgressMonitor.Task; import com.google.gerrit.server.git.MultiProgressMonitor.Task;
import com.google.gerrit.server.index.IndexExecutor; import com.google.gerrit.server.index.IndexExecutor;
import com.google.gerrit.server.index.SiteIndexer; import com.google.gerrit.server.index.SiteIndexer;
import com.google.gerrit.server.notedb.ChangeNotes; import com.google.gerrit.server.notedb.ChangeNotes;
import com.google.gerrit.server.patch.AutoMerger;
import com.google.gerrit.server.project.ProjectCache; import com.google.gerrit.server.project.ProjectCache;
import com.google.gerrit.server.query.change.ChangeData; import com.google.gerrit.server.query.change.ChangeData;
import com.google.gwtorm.server.SchemaFactory; import com.google.gwtorm.server.SchemaFactory;
@@ -49,33 +44,24 @@ import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.errors.RepositoryNotFoundException; import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.lib.TextProgressMonitor;
import org.eclipse.jgit.merge.ThreeWayMergeStrategy;
import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.util.io.DisabledOutputStream;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -89,8 +75,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
private final ChangeIndexer.Factory indexerFactory; private final ChangeIndexer.Factory indexerFactory;
private final ChangeNotes.Factory notesFactory; private final ChangeNotes.Factory notesFactory;
private final ProjectCache projectCache; private final ProjectCache projectCache;
private final ThreeWayMergeStrategy mergeStrategy;
private final AutoMerger autoMerger;
@Inject @Inject
AllChangesIndexer( AllChangesIndexer(
@@ -100,9 +84,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
@IndexExecutor(BATCH) ListeningExecutorService executor, @IndexExecutor(BATCH) ListeningExecutorService executor,
ChangeIndexer.Factory indexerFactory, ChangeIndexer.Factory indexerFactory,
ChangeNotes.Factory notesFactory, ChangeNotes.Factory notesFactory,
@GerritServerConfig Config config, ProjectCache projectCache) {
ProjectCache projectCache,
AutoMerger autoMerger) {
this.schemaFactory = schemaFactory; this.schemaFactory = schemaFactory;
this.changeDataFactory = changeDataFactory; this.changeDataFactory = changeDataFactory;
this.repoManager = repoManager; this.repoManager = repoManager;
@@ -110,8 +92,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
this.indexerFactory = indexerFactory; this.indexerFactory = indexerFactory;
this.notesFactory = notesFactory; this.notesFactory = notesFactory;
this.projectCache = projectCache; this.projectCache = projectCache;
this.mergeStrategy = MergeUtil.getMergeStrategy(config);
this.autoMerger = autoMerger;
} }
private static class ProjectHolder implements Comparable<ProjectHolder> { private static class ProjectHolder implements Comparable<ProjectHolder> {
@@ -241,9 +221,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
byId.put(r.getObjectId(), changeDataFactory.create(db, cn)); byId.put(r.getObjectId(), changeDataFactory.create(db, cn));
} }
} }
new ProjectIndexer( new ProjectIndexer(indexer, byId, repo, done, failed, verboseWriter).call();
indexer, mergeStrategy, autoMerger, byId, repo, done, failed, verboseWriter)
.call();
} catch (RepositoryNotFoundException rnfe) { } catch (RepositoryNotFoundException rnfe) {
log.error(rnfe.getMessage()); log.error(rnfe.getMessage());
} }
@@ -259,8 +237,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
private static class ProjectIndexer implements Callable<Void> { private static class ProjectIndexer implements Callable<Void> {
private final ChangeIndexer indexer; private final ChangeIndexer indexer;
private final ThreeWayMergeStrategy mergeStrategy;
private final AutoMerger autoMerger;
private final ListMultimap<ObjectId, ChangeData> byId; private final ListMultimap<ObjectId, ChangeData> byId;
private final ProgressMonitor done; private final ProgressMonitor done;
private final ProgressMonitor failed; private final ProgressMonitor failed;
@@ -269,16 +245,12 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
private ProjectIndexer( private ProjectIndexer(
ChangeIndexer indexer, ChangeIndexer indexer,
ThreeWayMergeStrategy mergeStrategy,
AutoMerger autoMerger,
ListMultimap<ObjectId, ChangeData> changesByCommitId, ListMultimap<ObjectId, ChangeData> changesByCommitId,
Repository repo, Repository repo,
ProgressMonitor done, ProgressMonitor done,
ProgressMonitor failed, ProgressMonitor failed,
PrintWriter verboseWriter) { PrintWriter verboseWriter) {
this.indexer = indexer; this.indexer = indexer;
this.mergeStrategy = mergeStrategy;
this.autoMerger = autoMerger;
this.byId = changesByCommitId; this.byId = changesByCommitId;
this.repo = repo; this.repo = repo;
this.done = done; this.done = done;
@@ -288,8 +260,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
@Override @Override
public Void call() throws Exception { public Void call() throws Exception {
try (ObjectInserter ins = repo.newObjectInserter(); try (RevWalk walk = new RevWalk(repo)) {
RevWalk walk = new RevWalk(ins.newReader())) {
// Walk only refs first to cover as many changes as we can without having // Walk only refs first to cover as many changes as we can without having
// to mark every single change. // to mark every single change.
for (Ref ref : repo.getRefDatabase().getRefs(Constants.R_HEADS).values()) { for (Ref ref : repo.getRefDatabase().getRefs(Constants.R_HEADS).values()) {
@@ -299,39 +270,29 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
} }
} }
// TODO(dborowitz): This is basically pointless; it computes
// currentFilePaths faster than going through PatchListCache, but we
// still need to go through PatchListCache for changedLines.
RevCommit bCommit; RevCommit bCommit;
while ((bCommit = walk.next()) != null && !byId.isEmpty()) { while ((bCommit = walk.next()) != null && !byId.isEmpty()) {
if (byId.containsKey(bCommit)) { if (byId.containsKey(bCommit)) {
getPathsAndIndex(walk, ins, bCommit); index(bCommit);
byId.removeAll(bCommit); byId.removeAll(bCommit);
} }
} }
for (ObjectId id : byId.keySet()) { for (ObjectId id : byId.keySet()) {
getPathsAndIndex(walk, ins, id); index(id);
} }
} }
return null; return null;
} }
private void getPathsAndIndex(RevWalk walk, ObjectInserter ins, ObjectId b) throws Exception { private void index(ObjectId b) throws Exception {
List<ChangeData> cds = Lists.newArrayList(byId.get(b)); List<ChangeData> cds = Lists.newArrayList(byId.get(b));
try (DiffFormatter df = new DiffFormatter(DisabledOutputStream.INSTANCE)) { try {
RevCommit bCommit = walk.parseCommit(b);
RevTree bTree = bCommit.getTree();
RevTree aTree = aFor(bCommit, walk, ins);
df.setRepository(repo);
if (!cds.isEmpty()) { if (!cds.isEmpty()) {
List<String> paths =
(aTree != null) ? getPaths(df.scan(aTree, bTree)) : Collections.<String>emptyList();
Iterator<ChangeData> cdit = cds.iterator(); Iterator<ChangeData> cdit = cds.iterator();
for (ChangeData cd; cdit.hasNext(); cdit.remove()) { for (ChangeData cd; cdit.hasNext(); cdit.remove()) {
cd = cdit.next(); cd = cdit.next();
try { try {
cd.setCurrentFilePaths(paths);
indexer.index(cd); indexer.index(cd);
done.update(1); done.update(1);
verboseWriter.println("Reindexed change " + cd.getId()); verboseWriter.println("Reindexed change " + cd.getId());
@@ -348,43 +309,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
} }
} }
private List<String> getPaths(List<DiffEntry> filenames) {
Set<String> paths = Sets.newTreeSet();
for (DiffEntry e : filenames) {
if (e.getOldPath() != null) {
paths.add(e.getOldPath());
}
if (e.getNewPath() != null) {
paths.add(e.getNewPath());
}
}
return ImmutableList.copyOf(paths);
}
private RevTree aFor(RevCommit b, RevWalk walk, ObjectInserter ins) throws IOException {
switch (b.getParentCount()) {
case 0:
return walk.parseTree(emptyTree());
case 1:
RevCommit a = b.getParent(0);
walk.parseBody(a);
return walk.parseTree(a.getTree());
case 2:
RevCommit am = autoMerger.merge(repo, walk, ins, b, mergeStrategy);
return am == null ? null : am.getTree();
default:
return null;
}
}
private ObjectId emptyTree() throws IOException {
try (ObjectInserter oi = repo.newObjectInserter()) {
ObjectId id = oi.insert(Constants.OBJ_TREE, new byte[] {});
oi.flush();
return id;
}
}
private void fail(String error, boolean failed, Exception e) { private void fail(String error, boolean failed, Exception e) {
if (failed) { if (failed) {
this.failed.update(1); this.failed.update(1);

View File

@@ -32,4 +32,7 @@ public interface PatchListCache {
DiffSummary getDiffSummary(Change change, PatchSet patchSet) DiffSummary getDiffSummary(Change change, PatchSet patchSet)
throws PatchListNotAvailableException; throws PatchListNotAvailableException;
DiffSummary getDiffSummary(DiffSummaryKey key, Project.NameKey project)
throws PatchListNotAvailableException;
} }

View File

@@ -163,7 +163,8 @@ public class PatchListCacheImpl implements PatchListCache {
DiffSummaryKey.fromPatchListKey(PatchListKey.againstDefaultBase(b, ws)), project); DiffSummaryKey.fromPatchListKey(PatchListKey.againstDefaultBase(b, ws)), project);
} }
private DiffSummary getDiffSummary(DiffSummaryKey key, Project.NameKey project) @Override
public DiffSummary getDiffSummary(DiffSummaryKey key, Project.NameKey project)
throws PatchListNotAvailableException { throws PatchListNotAvailableException {
try { try {
return diffSummaryCache.get(key, diffSummaryLoaderFactory.create(key, project)); return diffSummaryCache.get(key, diffSummaryLoaderFactory.create(key, project));