Merge "WalkSorter: Try harder to topo-sort"

This commit is contained in:
Dave Borowitz
2015-06-10 20:08:34 +00:00
committed by Gerrit Code Review
2 changed files with 84 additions and 15 deletions

View File

@@ -34,7 +34,9 @@ import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -123,10 +125,6 @@ class WalkSorter {
private List<PatchSetData> sortProject(Project.NameKey project,
Collection<ChangeData> in) throws OrmException, IOException {
// Use default sort; topo sorting is way too slow, as it slurps all
// interesting commits, and we don't mark anything uninteresting. If we
// really need to topo sort, we would need to identify the rootmost commits
// and mark their parents uninteresting, but that's nontrivial in itself.
try (Repository repo = repoManager.openRepository(project);
RevWalk rw = new RevWalk(repo)) {
rw.setRetainBody(retainBody);
@@ -137,14 +135,37 @@ class WalkSorter {
return ImmutableList.of(byCommit.values().iterator().next());
}
Collection<RevCommit> commits = byCommit.keySet();
List<RevCommit> mergeBases = findMergeBases(rw, commits);
rw.reset();
rw.setRevFilter(RevFilter.ALL);
// Mark parents of all merge bases as uninteresting, to limit the cost of
// the topo sort, which slurps all interesting commits before returning
// anything.
//
// There are two cases where this doesn't work and performance will
// suffer:
// 1. The common ancestor is a root commit; JGit currently does not return
// a root commit as a merge base, even if it is the common ancestor.
// 2. There is a merge of two unrelated branches of history; these simply
// have no merge base, so this technique of finding uninteresting
// ancestors doesn't work.
//
// The performance impact of these cases in practice is hopefully low;
// this does handle the common case of a single chain of commits.
for (RevCommit mergeBase : mergeBases) {
for (RevCommit p : mergeBase.getParents()) {
rw.markUninteresting(p);
}
}
rw.sort(RevSort.TOPO);
// Walk from all patch set SHA-1s, and terminate as soon as we've found
// everything we're looking for. This is equivalent to just sorting the
// list of commits by the RevWalk's configured order.
for (RevCommit c : byCommit.keySet()) {
rw.markStart(c);
}
int expected = byCommit.keySet().size();
markStart(rw, commits);
int expected = commits.size();
int found = 0;
RevCommit c;
List<PatchSetData> result = new ArrayList<>(expected);
@@ -193,6 +214,25 @@ class WalkSorter {
return includePatchSets.isEmpty() || includePatchSets.contains(ps.getId());
}
private static void markStart(RevWalk rw, Iterable<RevCommit> commits)
throws IOException {
for (RevCommit c : commits) {
rw.markStart(c);
}
}
private static List<RevCommit> findMergeBases(RevWalk rw,
Iterable<RevCommit> commits) throws IOException {
rw.setRevFilter(RevFilter.MERGE_BASE);
markStart(rw, commits);
List<RevCommit> result = new ArrayList<>();
RevCommit c;
while ((c = rw.next()) != null) {
result.add(c);
}
return result;
}
@AutoValue
static abstract class PatchSetData {
@VisibleForTesting

View File

@@ -93,6 +93,39 @@ public class WalkSorterTest {
@Test
public void seriesOfChangesAtSameTimestamp() throws Exception {
TestRepository<Repo> p = newRepo("p");
RevCommit c0 = p.commit().tick(0).create();
RevCommit c1 = p.commit().tick(0).parent(c0).create();
RevCommit c2 = p.commit().tick(0).parent(c1).create();
RevCommit c3 = p.commit().tick(0).parent(c2).create();
RevCommit c4 = p.commit().tick(0).parent(c3).create();
RevWalk rw = p.getRevWalk();
rw.parseCommit(c1);
assertThat(rw.parseCommit(c2).getCommitTime())
.isEqualTo(c1.getCommitTime());
assertThat(rw.parseCommit(c3).getCommitTime())
.isEqualTo(c1.getCommitTime());
assertThat(rw.parseCommit(c4).getCommitTime())
.isEqualTo(c1.getCommitTime());
ChangeData cd1 = newChange(p, c1);
ChangeData cd2 = newChange(p, c2);
ChangeData cd3 = newChange(p, c3);
ChangeData cd4 = newChange(p, c4);
List<ChangeData> changes = ImmutableList.of(cd1, cd2, cd3, cd4);
WalkSorter sorter = new WalkSorter(repoManager);
assertSorted(sorter, changes, ImmutableList.of(
patchSetData(cd4, c4),
patchSetData(cd3, c3),
patchSetData(cd2, c2),
patchSetData(cd1, c1)));
}
@Test
public void seriesOfChangesAtSameTimestampWithRootCommit() throws Exception {
TestRepository<Repo> p = newRepo("p");
RevCommit c1 = p.commit().tick(0).create();
RevCommit c2 = p.commit().tick(0).parent(c1).create();
@@ -115,16 +148,12 @@ public class WalkSorterTest {
List<ChangeData> changes = ImmutableList.of(cd1, cd2, cd3, cd4);
WalkSorter sorter = new WalkSorter(repoManager);
List<PatchSetData> expected = ImmutableList.of(
assertSorted(sorter, changes, ImmutableList.of(
patchSetData(cd4, c4),
patchSetData(cd3, c3),
patchSetData(cd2, c2),
patchSetData(cd1, c1));
for (List<ChangeData> list : permutations(changes)) {
// Not inOrder(); default sort can't distinguish between these.
assertThat(sorter.sort(list)).containsExactlyElementsIn(expected);
}
patchSetData(cd1, c1)));
}
@Test