internalChangeQuery: Batch byCommitsOnBranchNotMerged

A long side branch may include very many commits, and secondary index
implementations may not be able to handle arbitrarily large queries.
Even so, we do need to know exactly the subset of commits have
changes associated with them, so this query must be accurate.

Split up this particular query into batches of 100 commits. Because
it is of the form (x AND (a OR b OR ...)), it is logically correct to
split this up to ((x AND a) OR (x AND b) OR ...). (There is a very
slight chance of including duplicates in case of a corrupt sequence
of commits that has multiple commits in the same change ID, but they
are all deduplicated at the call site anyway.)

Change-Id: I627ed1bf2de806491e8c859f9576af6666f06836
This commit is contained in:
Dave Borowitz
2015-07-22 08:57:50 -07:00
parent 2065c9b3b4
commit 16ab894193
3 changed files with 71 additions and 10 deletions

View File

@@ -138,7 +138,7 @@ public class MergeSuperSet {
if (!hashes.isEmpty()) {
// Merged changes are ok to exclude
List<ChangeData> destChanges = queryProvider.get()
Iterable<ChangeData> destChanges = queryProvider.get()
.byCommitsOnBranchNotMerged(cd.change().getDest(), hashes);
for (ChangeData chd : destChanges) {
@@ -193,4 +193,4 @@ public class MergeSuperSet {
logError(msg);
throw new OrmException(msg);
}
}
}

View File

@@ -15,10 +15,14 @@
package com.google.gerrit.server.query.change;
import static com.google.gerrit.server.query.Predicate.and;
import static com.google.gerrit.server.query.Predicate.or;
import static com.google.gerrit.server.query.Predicate.not;
import static com.google.gerrit.server.query.Predicate.or;
import static com.google.gerrit.server.query.change.ChangeStatusPredicate.open;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Iterables;
import com.google.gerrit.common.Nullable;
import com.google.gerrit.reviewdb.client.Branch;
import com.google.gerrit.reviewdb.client.Change;
@@ -116,17 +120,38 @@ public class InternalChangeQuery {
open()));
}
public List<ChangeData> byCommitsOnBranchNotMerged(Branch.NameKey branch,
public Iterable<ChangeData> byCommitsOnBranchNotMerged(Branch.NameKey branch,
List<String> hashes) throws OrmException {
List<Predicate<ChangeData>> commits = new ArrayList<>();
return byCommitsOnBranchNotMerged(branch, hashes, 100);
}
@VisibleForTesting
Iterable<ChangeData> byCommitsOnBranchNotMerged(Branch.NameKey branch,
List<String> hashes, int batchSize) throws OrmException {
List<Predicate<ChangeData>> commits = new ArrayList<>(hashes.size());
for (String s : hashes) {
commits.add(commit(AbbreviatedObjectId.fromString(s)));
}
return query(and(
ref(branch),
project(branch.getParentKey()),
not(status(Change.Status.MERGED)),
or(commits)));
int numBatches = (hashes.size() / batchSize) + 1;
List<Predicate<ChangeData>> queries = new ArrayList<>(numBatches);
for (List<Predicate<ChangeData>> batch : Iterables.partition(commits, batchSize)) {
queries.add(and(
ref(branch),
project(branch.getParentKey()),
not(status(Change.Status.MERGED)),
or(batch)));
}
try {
return FluentIterable.from(qp.queryChanges(queries))
.transformAndConcat(new Function<QueryResult, List<ChangeData>>() {
@Override
public List<ChangeData> apply(QueryResult in) {
return in.changes();
}
});
} catch (QueryParseException e) {
throw new OrmException(e);
}
}
public List<ChangeData> byProjectOpen(Project.NameKey project)