Speed up patch set against patch set diffs
The diff computation between patch sets was especially slow when many files (e.g. >20k) were modified by the commits between the parents. The reasons why this was slow are: - The computation of the size and the size delta is expensive for many files. - The computation of the file header and the edits is expensive for many files (whereas DiffFormatter#scan seems to be very cheap). This fix avoids to compute the size and size delta for the diff between the parents. In addition, it sorts out all files which were clearly not touched by the patch sets. The file headers and edits will only be computed for the remaining (much smaller) set of files. As the result should be the same as before, we need not invalidate the cache. The code of this fix is far from being clean in any way. It's a quick hack to fix the immediate performance issue. I promise to clean up the whole implementation in the future. Change-Id: I209cf8150f565946a6cad05f67816e8424737a55
This commit is contained in:
		@@ -34,6 +34,8 @@ import org.eclipse.jgit.lib.ObjectId;
 | 
			
		||||
public class PatchListKey implements Serializable {
 | 
			
		||||
  public static final long serialVersionUID = 28L;
 | 
			
		||||
 | 
			
		||||
  // TODO(aliceks): Get rid of this enum and the parameter in the PatchListKey as we only use one of
 | 
			
		||||
  // its values.
 | 
			
		||||
  public enum Algorithm {
 | 
			
		||||
    PURE_TREE_DIFF,
 | 
			
		||||
    OPTIMIZED_DIFF
 | 
			
		||||
@@ -69,11 +71,6 @@ public class PatchListKey implements Serializable {
 | 
			
		||||
    return new PatchListKey(otherCommitId, newId, whitespace, Algorithm.OPTIMIZED_DIFF);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public static PatchListKey againstCommitWithPureTreeDiff(
 | 
			
		||||
      AnyObjectId otherCommitId, AnyObjectId newId, Whitespace whitespace) {
 | 
			
		||||
    return new PatchListKey(otherCommitId, newId, whitespace, Algorithm.PURE_TREE_DIFF);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Old patch-set ID
 | 
			
		||||
   *
 | 
			
		||||
 
 | 
			
		||||
@@ -16,10 +16,12 @@
 | 
			
		||||
package com.google.gerrit.server.patch;
 | 
			
		||||
 | 
			
		||||
import static com.google.common.base.Preconditions.checkArgument;
 | 
			
		||||
import static com.google.common.collect.ImmutableList.toImmutableList;
 | 
			
		||||
import static java.nio.charset.StandardCharsets.UTF_8;
 | 
			
		||||
import static java.util.stream.Collectors.toSet;
 | 
			
		||||
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
 | 
			
		||||
 | 
			
		||||
import com.google.auto.value.AutoValue;
 | 
			
		||||
import com.google.common.base.Throwables;
 | 
			
		||||
import com.google.common.collect.ImmutableList;
 | 
			
		||||
import com.google.common.collect.ImmutableMultimap;
 | 
			
		||||
@@ -38,6 +40,7 @@ import com.google.inject.Inject;
 | 
			
		||||
import com.google.inject.assistedinject.Assisted;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.HashSet;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.Optional;
 | 
			
		||||
import java.util.Set;
 | 
			
		||||
@@ -183,6 +186,14 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
      df.setDetectRenames(true);
 | 
			
		||||
      List<DiffEntry> diffEntries = df.scan(aTree, bTree);
 | 
			
		||||
 | 
			
		||||
      Multimap<String, ContextAwareEdit> editsDueToRebasePerFilePath = ImmutableMultimap.of();
 | 
			
		||||
      if (key.getAlgorithm() == PatchListKey.Algorithm.OPTIMIZED_DIFF) {
 | 
			
		||||
        EditsDueToRebaseResult editsDueToRebaseResult =
 | 
			
		||||
            determineEditsDueToRebase(aCommit, b, diffEntries, df, rw);
 | 
			
		||||
        diffEntries = editsDueToRebaseResult.getRelevantOriginalDiffEntries();
 | 
			
		||||
        editsDueToRebasePerFilePath = editsDueToRebaseResult.getEditsDueToRebasePerFilePath();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      List<PatchListEntry> entries = new ArrayList<>();
 | 
			
		||||
      entries.add(
 | 
			
		||||
          newCommitMessage(
 | 
			
		||||
@@ -197,10 +208,6 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
                b,
 | 
			
		||||
                comparisonType));
 | 
			
		||||
      }
 | 
			
		||||
      Multimap<String, ContextAwareEdit> editsDueToRebasePerFilePath =
 | 
			
		||||
          key.getAlgorithm() == PatchListKey.Algorithm.OPTIMIZED_DIFF
 | 
			
		||||
              ? getEditsDueToRebasePerFilePath(aCommit, b)
 | 
			
		||||
              : ImmutableMultimap.of();
 | 
			
		||||
      for (DiffEntry diffEntry : diffEntries) {
 | 
			
		||||
        Set<ContextAwareEdit> editsDueToRebase =
 | 
			
		||||
            getEditsDueToRebase(editsDueToRebasePerFilePath, diffEntry);
 | 
			
		||||
@@ -242,32 +249,68 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
   *
 | 
			
		||||
   * @param commitA the commit defining {@code treeA}
 | 
			
		||||
   * @param commitB the commit defining {@code treeB}
 | 
			
		||||
   * @return the edits per file path they modify in {@code treeB}
 | 
			
		||||
   * @param diffEntries the list of {@code DiffEntries} for the diff between {@code commitA} and
 | 
			
		||||
   *     {@code commitB}
 | 
			
		||||
   * @param df the {@code DiffFormatter}
 | 
			
		||||
   * @param rw the current {@code RevWalk}
 | 
			
		||||
   * @return an aggregated result of the computation
 | 
			
		||||
   * @throws PatchListNotAvailableException if the edits can't be identified
 | 
			
		||||
   * @throws IOException if an error occurred while accessing the repository
 | 
			
		||||
   */
 | 
			
		||||
  private Multimap<String, ContextAwareEdit> getEditsDueToRebasePerFilePath(
 | 
			
		||||
      RevCommit commitA, RevCommit commitB) throws PatchListNotAvailableException {
 | 
			
		||||
  private EditsDueToRebaseResult determineEditsDueToRebase(
 | 
			
		||||
      RevCommit commitA,
 | 
			
		||||
      RevCommit commitB,
 | 
			
		||||
      List<DiffEntry> diffEntries,
 | 
			
		||||
      DiffFormatter df,
 | 
			
		||||
      RevWalk rw)
 | 
			
		||||
      throws PatchListNotAvailableException, IOException {
 | 
			
		||||
    if (commitA == null
 | 
			
		||||
        || isRootOrMergeCommit(commitA)
 | 
			
		||||
        || isRootOrMergeCommit(commitB)
 | 
			
		||||
        || areParentChild(commitA, commitB)
 | 
			
		||||
        || haveCommonParent(commitA, commitB)) {
 | 
			
		||||
      return ImmutableMultimap.of();
 | 
			
		||||
      return EditsDueToRebaseResult.create(diffEntries, ImmutableMultimap.of());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    PatchListKey parentDiffKey =
 | 
			
		||||
        PatchListKey.againstCommitWithPureTreeDiff(
 | 
			
		||||
            commitA.getParent(0), commitB.getParent(0), key.getWhitespace());
 | 
			
		||||
    PatchList parentPatchList = patchListCache.get(parentDiffKey, project);
 | 
			
		||||
    PatchListKey oldKey = PatchListKey.againstDefaultBase(key.getOldId(), key.getWhitespace());
 | 
			
		||||
    PatchList oldPatchList = patchListCache.get(oldKey, project);
 | 
			
		||||
    PatchListKey newKey = PatchListKey.againstDefaultBase(key.getNewId(), key.getWhitespace());
 | 
			
		||||
    PatchList newPatchList = patchListCache.get(newKey, project);
 | 
			
		||||
 | 
			
		||||
    EditTransformer editTransformer = new EditTransformer(parentPatchList.getPatches());
 | 
			
		||||
    editTransformer.transformReferencesOfSideA(oldPatchList.getPatches());
 | 
			
		||||
    editTransformer.transformReferencesOfSideB(newPatchList.getPatches());
 | 
			
		||||
    return editTransformer.getEditsPerFilePath();
 | 
			
		||||
    List<PatchListEntry> oldPatches = oldPatchList.getPatches();
 | 
			
		||||
    List<PatchListEntry> newPatches = newPatchList.getPatches();
 | 
			
		||||
    // TODO(aliceks): Have separate but more limited lists for parents and patch sets (but don't
 | 
			
		||||
    // mess up renames/copies).
 | 
			
		||||
    Set<String> touchedFilePaths = new HashSet<>();
 | 
			
		||||
    for (PatchListEntry patchListEntry : oldPatches) {
 | 
			
		||||
      touchedFilePaths.addAll(getTouchedFilePaths(patchListEntry));
 | 
			
		||||
    }
 | 
			
		||||
    for (PatchListEntry patchListEntry : newPatches) {
 | 
			
		||||
      touchedFilePaths.addAll(getTouchedFilePaths(patchListEntry));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    List<DiffEntry> relevantDiffEntries =
 | 
			
		||||
        diffEntries
 | 
			
		||||
            .stream()
 | 
			
		||||
            .filter(diffEntry -> isTouched(touchedFilePaths, diffEntry))
 | 
			
		||||
            .collect(toImmutableList());
 | 
			
		||||
 | 
			
		||||
    RevCommit parentCommitA = commitA.getParent(0);
 | 
			
		||||
    rw.parseBody(parentCommitA);
 | 
			
		||||
    RevCommit parentCommitB = commitB.getParent(0);
 | 
			
		||||
    rw.parseBody(parentCommitB);
 | 
			
		||||
    List<DiffEntry> parentDiffEntries = df.scan(parentCommitA, parentCommitB);
 | 
			
		||||
    // TODO(aliceks): Find a way to not construct a PatchListEntry as it contains many unnecessary
 | 
			
		||||
    // details and we don't fill all of them properly.
 | 
			
		||||
    List<PatchListEntry> parentPatchListEntries =
 | 
			
		||||
        getRelevantPatchListEntries(
 | 
			
		||||
            parentDiffEntries, parentCommitA, parentCommitB, touchedFilePaths, df);
 | 
			
		||||
 | 
			
		||||
    EditTransformer editTransformer = new EditTransformer(parentPatchListEntries);
 | 
			
		||||
    editTransformer.transformReferencesOfSideA(oldPatches);
 | 
			
		||||
    editTransformer.transformReferencesOfSideB(newPatches);
 | 
			
		||||
    return EditsDueToRebaseResult.create(
 | 
			
		||||
        relevantDiffEntries, editTransformer.getEditsPerFilePath());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static boolean isRootOrMergeCommit(RevCommit commit) {
 | 
			
		||||
@@ -283,6 +326,45 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
    return ObjectId.equals(commitA.getParent(0), commitB.getParent(0));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static Set<String> getTouchedFilePaths(PatchListEntry patchListEntry) {
 | 
			
		||||
    String oldFilePath = patchListEntry.getOldName();
 | 
			
		||||
    String newFilePath = patchListEntry.getNewName();
 | 
			
		||||
 | 
			
		||||
    return oldFilePath == null
 | 
			
		||||
        ? ImmutableSet.of(newFilePath)
 | 
			
		||||
        : ImmutableSet.of(oldFilePath, newFilePath);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static boolean isTouched(Set<String> touchedFilePaths, DiffEntry diffEntry) {
 | 
			
		||||
    String oldFilePath = diffEntry.getOldPath();
 | 
			
		||||
    String newFilePath = diffEntry.getNewPath();
 | 
			
		||||
    // One of the above file paths could be /dev/null but we need not explicitly check for this
 | 
			
		||||
    // value as the set of file paths shouldn't contain it.
 | 
			
		||||
    return touchedFilePaths.contains(oldFilePath) || touchedFilePaths.contains(newFilePath);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private List<PatchListEntry> getRelevantPatchListEntries(
 | 
			
		||||
      List<DiffEntry> parentDiffEntries,
 | 
			
		||||
      RevCommit parentCommitA,
 | 
			
		||||
      RevCommit parentCommitB,
 | 
			
		||||
      Set<String> touchedFilePaths,
 | 
			
		||||
      DiffFormatter diffFormatter)
 | 
			
		||||
      throws IOException {
 | 
			
		||||
    List<PatchListEntry> parentPatchListEntries = new ArrayList<>(parentDiffEntries.size());
 | 
			
		||||
    for (DiffEntry parentDiffEntry : parentDiffEntries) {
 | 
			
		||||
      if (!isTouched(touchedFilePaths, parentDiffEntry)) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      FileHeader fileHeader = toFileHeader(parentCommitB, diffFormatter, parentDiffEntry);
 | 
			
		||||
      // The code which uses this PatchListEntry doesn't care about the last three parameters. As
 | 
			
		||||
      // they are expensive to compute, we use arbitrary values for them.
 | 
			
		||||
      PatchListEntry patchListEntry =
 | 
			
		||||
          newEntry(parentCommitA.getTree(), fileHeader, ImmutableSet.of(), 0, 0);
 | 
			
		||||
      parentPatchListEntries.add(patchListEntry);
 | 
			
		||||
    }
 | 
			
		||||
    return parentPatchListEntries;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static Set<ContextAwareEdit> getEditsDueToRebase(
 | 
			
		||||
      Multimap<String, ContextAwareEdit> editsDueToRebasePerFilePath, DiffEntry diffEntry) {
 | 
			
		||||
    if (editsDueToRebasePerFilePath.isEmpty()) {
 | 
			
		||||
@@ -304,7 +386,7 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
      RevTree treeB,
 | 
			
		||||
      Set<ContextAwareEdit> editsDueToRebase)
 | 
			
		||||
      throws IOException {
 | 
			
		||||
    FileHeader fileHeader = toFileHeader(key, diffFormatter, diffEntry);
 | 
			
		||||
    FileHeader fileHeader = toFileHeader(key.getNewId(), diffFormatter, diffEntry);
 | 
			
		||||
    long oldSize = getFileSize(objectReader, diffEntry.getOldMode(), diffEntry.getOldPath(), treeA);
 | 
			
		||||
    long newSize = getFileSize(objectReader, diffEntry.getNewMode(), diffEntry.getNewPath(), treeB);
 | 
			
		||||
    Set<Edit> contentEditsDueToRebase = getContentEdits(editsDueToRebase);
 | 
			
		||||
@@ -356,7 +438,7 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private FileHeader toFileHeader(
 | 
			
		||||
      PatchListKey key, DiffFormatter diffFormatter, DiffEntry diffEntry) throws IOException {
 | 
			
		||||
      ObjectId commitB, DiffFormatter diffFormatter, DiffEntry diffEntry) throws IOException {
 | 
			
		||||
 | 
			
		||||
    Future<FileHeader> result =
 | 
			
		||||
        diffExecutor.submit(
 | 
			
		||||
@@ -375,7 +457,7 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
              + " in project "
 | 
			
		||||
              + project
 | 
			
		||||
              + " on commit "
 | 
			
		||||
              + key.getNewId().name()
 | 
			
		||||
              + commitB.name()
 | 
			
		||||
              + " on path "
 | 
			
		||||
              + diffEntry.getNewPath()
 | 
			
		||||
              + " comparing "
 | 
			
		||||
@@ -505,4 +587,19 @@ public class PatchListLoader implements Callable<PatchList> {
 | 
			
		||||
    ins.flush();
 | 
			
		||||
    return id;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @AutoValue
 | 
			
		||||
  abstract static class EditsDueToRebaseResult {
 | 
			
		||||
    public static EditsDueToRebaseResult create(
 | 
			
		||||
        List<DiffEntry> relevantDiffEntries,
 | 
			
		||||
        Multimap<String, ContextAwareEdit> editsDueToRebasePerFilePath) {
 | 
			
		||||
      return new AutoValue_PatchListLoader_EditsDueToRebaseResult(
 | 
			
		||||
          relevantDiffEntries, editsDueToRebasePerFilePath);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public abstract List<DiffEntry> getRelevantOriginalDiffEntries();
 | 
			
		||||
 | 
			
		||||
    /** Returns the edits per file path they modify in {@code treeB}. */
 | 
			
		||||
    public abstract Multimap<String, ContextAwareEdit> getEditsDueToRebasePerFilePath();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user