Add separate field for exact commit search

Index implementations (including both Lucene and the googlesource.com
index) cannot really implement prefix search efficiently: they need
to iterate over all tokens in the inverted index with a given prefix.
When we have a full commit SHA-1, this indirection is wasteful,
particularly in cases like byCommitsOnBranchNotMerged where there
might be hundreds or thousands of terms.

Add a separate field that indexes the full commit SHA-1 to make these
specific queries more efficient. There is no need for a separate
query operator; we can just choose the predicate based on the length
of the input string.

Change-Id: Ie26d58ab82e9efaf2c32d352cf285a2a1d59a8d8
This commit is contained in:
Dave Borowitz
2015-07-22 10:52:39 -07:00
parent 20363160f7
commit 5c79e7cb49
6 changed files with 121 additions and 20 deletions

View File

@@ -303,22 +303,37 @@ public class ChangeField {
}
};
/** Commit id of any PatchSet on the change */
/** Commit ID of any patch set on the change, using prefix match. */
public static final FieldDef<ChangeData, Iterable<String>> COMMIT =
new FieldDef.Repeatable<ChangeData, String>(
ChangeQueryBuilder.FIELD_COMMIT, FieldType.PREFIX, false) {
@Override
public Iterable<String> get(ChangeData input, FillArgs args)
throws OrmException {
return getRevisions(input);
}
};
/** Commit ID of any patch set on the change, using exact match. */
public static final FieldDef<ChangeData, Iterable<String>> EXACT_COMMIT =
new FieldDef.Repeatable<ChangeData, String>(
"exactcommit", FieldType.EXACT, false) {
@Override
public Iterable<String> get(ChangeData input, FillArgs args)
throws OrmException {
return getRevisions(input);
}
};
private static Set<String> getRevisions(ChangeData cd) throws OrmException {
Set<String> revisions = Sets.newHashSet();
for (PatchSet ps : input.patchSets()) {
for (PatchSet ps : cd.patchSets()) {
if (ps.getRevision() != null) {
revisions.add(ps.getRevision().get());
}
}
return revisions;
}
};
/** Tracking id extracted from a footer. */
public static final FieldDef<ChangeData, Iterable<String>> TR =

View File

@@ -241,6 +241,7 @@ public class ChangeSchemas {
ChangeField.EDITBY,
ChangeField.REVIEWEDBY);
@Deprecated
static final Schema<ChangeData> V21 = schema(
ChangeField.LEGACY_ID,
ChangeField.ID,
@@ -273,6 +274,40 @@ public class ChangeSchemas {
ChangeField.EDITBY,
ChangeField.REVIEWEDBY);
static final Schema<ChangeData> V22 = schema(
ChangeField.LEGACY_ID,
ChangeField.ID,
ChangeField.STATUS,
ChangeField.PROJECT,
ChangeField.PROJECTS,
ChangeField.REF,
ChangeField.EXACT_TOPIC,
ChangeField.FUZZY_TOPIC,
ChangeField.UPDATED,
ChangeField.FILE_PART,
ChangeField.PATH,
ChangeField.OWNER,
ChangeField.REVIEWER,
ChangeField.COMMIT,
ChangeField.TR,
ChangeField.LABEL,
ChangeField.COMMIT_MESSAGE,
ChangeField.COMMENT,
ChangeField.CHANGE,
ChangeField.APPROVAL,
ChangeField.MERGEABLE,
ChangeField.ADDED,
ChangeField.DELETED,
ChangeField.DELTA,
ChangeField.HASHTAG,
ChangeField.COMMENTBY,
ChangeField.PATCH_SET,
ChangeField.GROUP,
ChangeField.EDITBY,
ChangeField.REVIEWEDBY,
ChangeField.EXACT_COMMIT);
private static Schema<ChangeData> schema(Collection<FieldDef<ChangeData, ?>> fields) {
return new Schema<>(ImmutableList.copyOf(fields));
}

View File

@@ -63,6 +63,8 @@ import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import java.io.IOException;
@@ -431,7 +433,10 @@ public class ChangeQueryBuilder extends QueryBuilder<ChangeData> {
@Operator
public Predicate<ChangeData> commit(String id) {
return new CommitPredicate(AbbreviatedObjectId.fromString(id));
if (id.length() == Constants.OBJECT_ID_STRING_LENGTH) {
return new ExactCommitPredicate(ObjectId.fromString(id));
}
return new CommitPrefixPredicate(AbbreviatedObjectId.fromString(id));
}
@Operator

View File

@@ -22,10 +22,10 @@ import com.google.gwtorm.server.OrmException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.ObjectId;
class CommitPredicate extends IndexPredicate<ChangeData> {
class CommitPrefixPredicate extends IndexPredicate<ChangeData> {
private final AbbreviatedObjectId abbrevId;
CommitPredicate(AbbreviatedObjectId id) {
CommitPrefixPredicate(AbbreviatedObjectId id) {
super(ChangeField.COMMIT, id.name());
this.abbrevId = id;
}

View File

@@ -0,0 +1,51 @@
// Copyright (C) 2015 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.query.change;
import com.google.gerrit.reviewdb.client.PatchSet;
import com.google.gerrit.server.index.ChangeField;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gwtorm.server.OrmException;
import org.eclipse.jgit.lib.ObjectId;
import java.util.Objects;
class ExactCommitPredicate extends IndexPredicate<ChangeData> {
private final ObjectId id;
ExactCommitPredicate(ObjectId id) {
super(ChangeField.COMMIT, id.name());
this.id = id;
}
@Override
public boolean match(final ChangeData object) throws OrmException {
String idStr = id.name();
for (PatchSet p : object.patchSets()) {
if (p.getRevision() != null && p.getRevision().get() != null) {
if (Objects.equals(p.getRevision().get(), idStr)) {
return true;
}
}
}
return false;
}
@Override
public int getCost() {
return 1;
}
}

View File

@@ -35,7 +35,6 @@ import com.google.gerrit.server.query.QueryParseException;
import com.google.gwtorm.server.OrmException;
import com.google.inject.Inject;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.ObjectId;
import java.util.ArrayList;
@@ -67,8 +66,8 @@ public class InternalChangeQuery {
return new ChangeStatusPredicate(status);
}
private static Predicate<ChangeData> commit(AbbreviatedObjectId id) {
return new CommitPredicate(id);
private static Predicate<ChangeData> commit(ObjectId id) {
return new ExactCommitPredicate(id);
}
private final QueryProcessor qp;
@@ -130,7 +129,7 @@ public class InternalChangeQuery {
List<String> hashes, int batchSize) throws OrmException {
List<Predicate<ChangeData>> commits = new ArrayList<>(hashes.size());
for (String s : hashes) {
commits.add(commit(AbbreviatedObjectId.fromString(s)));
commits.add(commit(ObjectId.fromString(s)));
}
int numBatches = (hashes.size() / batchSize) + 1;
List<Predicate<ChangeData>> queries = new ArrayList<>(numBatches);
@@ -164,12 +163,8 @@ public class InternalChangeQuery {
return query(and(new ExactTopicPredicate(schema(indexes), topic), open()));
}
public List<ChangeData> byCommitPrefix(String prefix) throws OrmException {
return query(commit(AbbreviatedObjectId.fromString(prefix)));
}
public List<ChangeData> byCommit(ObjectId id) throws OrmException {
return query(commit(AbbreviatedObjectId.fromObjectId(id)));
return query(commit(id));
}
public List<ChangeData> byProjectGroups(Project.NameKey project,