WalkSorter: Try harder to topo-sort

When removing the topo-sort I had forgotten that the reason we cared
about this was that rebases frequently produce series of changes with
identical commit timestamps. Fortunately, in the common case we can
use JGit's merge base detection to find commits to mark uninteresting.

Change-Id: I77a5b606ec7da0a9c6542ffae71383b32cf401e5
This commit is contained in:
Dave Borowitz
2015-06-05 16:58:12 -07:00
parent b01b8721b2
commit a87552e2bc
2 changed files with 84 additions and 15 deletions

View File

@@ -34,7 +34,9 @@ import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -123,10 +125,6 @@ class WalkSorter {
private List<PatchSetData> sortProject(Project.NameKey project,
Collection<ChangeData> in) throws OrmException, IOException {
// Use default sort; topo sorting is way too slow, as it slurps all
// interesting commits, and we don't mark anything uninteresting. If we
// really need to topo sort, we would need to identify the rootmost commits
// and mark their parents uninteresting, but that's nontrivial in itself.
try (Repository repo = repoManager.openRepository(project);
RevWalk rw = new RevWalk(repo)) {
rw.setRetainBody(retainBody);
@@ -137,14 +135,37 @@ class WalkSorter {
return ImmutableList.of(byCommit.values().iterator().next());
}
Collection<RevCommit> commits = byCommit.keySet();
List<RevCommit> mergeBases = findMergeBases(rw, commits);
rw.reset();
rw.setRevFilter(RevFilter.ALL);
// Mark parents of all merge bases as uninteresting, to limit the cost of
// the topo sort, which slurps all interesting commits before returning
// anything.
//
// There are two cases where this doesn't work and performance will
// suffer:
// 1. The common ancestor is a root commit; JGit currently does not return
// a root commit as a merge base, even if it is the common ancestor.
// 2. There is a merge of two unrelated branches of history; these simply
// have no merge base, so this technique of finding uninteresting
// ancestors doesn't work.
//
// The performance impact of these cases in practice is hopefully low;
// this does handle the common case of a single chain of commits.
for (RevCommit mergeBase : mergeBases) {
for (RevCommit p : mergeBase.getParents()) {
rw.markUninteresting(p);
}
}
rw.sort(RevSort.TOPO);
// Walk from all patch set SHA-1s, and terminate as soon as we've found
// everything we're looking for. This is equivalent to just sorting the
// list of commits by the RevWalk's configured order.
for (RevCommit c : byCommit.keySet()) {
rw.markStart(c);
}
int expected = byCommit.keySet().size();
markStart(rw, commits);
int expected = commits.size();
int found = 0;
RevCommit c;
List<PatchSetData> result = new ArrayList<>(expected);
@@ -193,6 +214,25 @@ class WalkSorter {
return includePatchSets.isEmpty() || includePatchSets.contains(ps.getId());
}
private static void markStart(RevWalk rw, Iterable<RevCommit> commits)
throws IOException {
for (RevCommit c : commits) {
rw.markStart(c);
}
}
private static List<RevCommit> findMergeBases(RevWalk rw,
Iterable<RevCommit> commits) throws IOException {
rw.setRevFilter(RevFilter.MERGE_BASE);
markStart(rw, commits);
List<RevCommit> result = new ArrayList<>();
RevCommit c;
while ((c = rw.next()) != null) {
result.add(c);
}
return result;
}
@AutoValue
static abstract class PatchSetData {
@VisibleForTesting

View File

@@ -93,6 +93,39 @@ public class WalkSorterTest {
@Test
public void seriesOfChangesAtSameTimestamp() throws Exception {
TestRepository<Repo> p = newRepo("p");
RevCommit c0 = p.commit().tick(0).create();
RevCommit c1 = p.commit().tick(0).parent(c0).create();
RevCommit c2 = p.commit().tick(0).parent(c1).create();
RevCommit c3 = p.commit().tick(0).parent(c2).create();
RevCommit c4 = p.commit().tick(0).parent(c3).create();
RevWalk rw = p.getRevWalk();
rw.parseCommit(c1);
assertThat(rw.parseCommit(c2).getCommitTime())
.isEqualTo(c1.getCommitTime());
assertThat(rw.parseCommit(c3).getCommitTime())
.isEqualTo(c1.getCommitTime());
assertThat(rw.parseCommit(c4).getCommitTime())
.isEqualTo(c1.getCommitTime());
ChangeData cd1 = newChange(p, c1);
ChangeData cd2 = newChange(p, c2);
ChangeData cd3 = newChange(p, c3);
ChangeData cd4 = newChange(p, c4);
List<ChangeData> changes = ImmutableList.of(cd1, cd2, cd3, cd4);
WalkSorter sorter = new WalkSorter(repoManager);
assertSorted(sorter, changes, ImmutableList.of(
patchSetData(cd4, c4),
patchSetData(cd3, c3),
patchSetData(cd2, c2),
patchSetData(cd1, c1)));
}
@Test
public void seriesOfChangesAtSameTimestampWithRootCommit() throws Exception {
TestRepository<Repo> p = newRepo("p");
RevCommit c1 = p.commit().tick(0).create();
RevCommit c2 = p.commit().tick(0).parent(c1).create();
@@ -115,16 +148,12 @@ public class WalkSorterTest {
List<ChangeData> changes = ImmutableList.of(cd1, cd2, cd3, cd4);
WalkSorter sorter = new WalkSorter(repoManager);
List<PatchSetData> expected = ImmutableList.of(
assertSorted(sorter, changes, ImmutableList.of(
patchSetData(cd4, c4),
patchSetData(cd3, c3),
patchSetData(cd2, c2),
patchSetData(cd1, c1));
for (List<ChangeData> list : permutations(changes)) {
// Not inOrder(); default sort can't distinguish between these.
assertThat(sorter.sort(list)).containsExactlyElementsIn(expected);
}
patchSetData(cd1, c1)));
}
@Test