Lucene: Search commit messages using secondary index

Change-Id: Iff2285d0b4934e9871e7635ae3681555f06ea336
This commit is contained in:
Gohulan Balachandran 2013-06-25 14:33:51 -06:00 committed by Edwin Kempin
parent a7c0ba2b50
commit 122ecd55fc
6 changed files with 72 additions and 34 deletions

View File

@ -57,6 +57,7 @@ import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -66,6 +67,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
@ -279,6 +281,8 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
return exactQuery(p);
} else if (p.getType() == FieldType.PREFIX) {
return prefixQuery(p);
} else if (p.getType() == FieldType.FULL_TEXT) {
return fullTextQuery(p);
} else if (p instanceof SortKeyPredicate) {
return sortKeyQuery((SortKeyPredicate) p);
} else {
@ -363,6 +367,10 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
return new PrefixQuery(new Term(p.getField().getName(), p.getValue()));
}
private Query fullTextQuery(IndexPredicate<ChangeData> p) {
return new FuzzyQuery(new Term(p.getField().getName(), p.getValue()));
}
private static class QuerySource implements ChangeDataSource {
// TODO(dborowitz): Push limit down from predicate tree.
private static final int LIMIT = 1000;
@ -484,6 +492,10 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
for (Object value : values) {
doc.add(new StringField(name, (String) value, store));
}
} else if (f.getType() == FieldType.FULL_TEXT) {
for (Object value : values) {
doc.add(new TextField(name, (String) value, store));
}
} else {
throw badFieldType(f.getType());
}

View File

@ -27,6 +27,7 @@ import com.google.gerrit.server.query.change.ChangeQueryBuilder;
import com.google.gerrit.server.query.change.ChangeStatusPredicate;
import com.google.gwtorm.server.OrmException;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.lang.reflect.ParameterizedType;
@ -46,7 +47,7 @@ import java.util.Set;
*/
public class ChangeField {
/** Increment whenever making schema changes. */
public static final int SCHEMA_VERSION = 11;
public static final int SCHEMA_VERSION = 12;
/** Legacy change ID. */
public static final FieldDef<ChangeData, Integer> LEGACY_ID =
@ -234,6 +235,20 @@ public class ChangeField {
+ (accountId != null ? "," + accountId.get() : "");
}
/** Commit message of the current patch set. */
public static final FieldDef<ChangeData, String> COMMIT_MESSAGE =
new FieldDef.Single<ChangeData, String>(ChangeQueryBuilder.FIELD_MESSAGE,
FieldType.FULL_TEXT, false) {
@Override
public String get(ChangeData input, FillArgs args) throws OrmException {
try {
return input.commitMessage(args.repoManager, args.db);
} catch (IOException e) {
throw new OrmException(e);
}
}
};
public static final ImmutableMap<String, FieldDef<ChangeData, ?>> ALL;
static {

View File

@ -15,6 +15,7 @@
package com.google.gerrit.server.index;
import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.patch.PatchListCache;
import com.google.gwtorm.server.OrmException;
import com.google.inject.Inject;
@ -57,12 +58,15 @@ public abstract class FieldDef<I, T> {
/** Arguments needed to fill in missing data in the input object. */
public static class FillArgs {
final Provider<ReviewDb> db;
final GitRepositoryManager repoManager;
final PatchListCache patchListCache;
@Inject
FillArgs(Provider<ReviewDb> db,
GitRepositoryManager repoManager,
PatchListCache patchListCache) {
this.db = db;
this.repoManager = repoManager;
this.patchListCache = patchListCache;
}
}

View File

@ -39,6 +39,10 @@ public class FieldType<T> {
public static final FieldType<String> PREFIX =
new FieldType<String>("PREFIX");
/** A string field searched using fuzzy-match semantics. */
public static final FieldType<String> FULL_TEXT =
new FieldType<String>("FULL_TEXT");
private final String name;
private FieldType(String name) {

View File

@ -379,8 +379,12 @@ public class ChangeQueryBuilder extends QueryBuilder<ChangeData> {
}
@Operator
public Predicate<ChangeData> message(String text) {
return new MessagePredicate(args.dbProvider, args.repoManager, text);
public Predicate<ChangeData> message(String text) throws QueryParseException {
if (args.index == ChangeIndex.DISABLED) {
throw error("secondary index must be enabled for message:" + text);
}
return new MessagePredicate(args.dbProvider, args.index, text);
}
@Operator

View File

@ -15,49 +15,43 @@
package com.google.gerrit.server.query.change;
import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.index.ChangeField;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gwtorm.server.OrmException;
import com.google.inject.Provider;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.revwalk.filter.MessageRevFilter;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* Predicate to match changes that contains specified text in commit messages
* body.
*/
public class MessagePredicate extends RevWalkPredicate {
class MessagePredicate extends IndexPredicate<ChangeData> {
private final Provider<ReviewDb> db;
private final ChangeIndex index;
private static final Logger log =
LoggerFactory.getLogger(MessagePredicate.class);
private final RevFilter rFilter;
public MessagePredicate(Provider<ReviewDb> db,
GitRepositoryManager repoManager, String text) {
super(db, repoManager, ChangeQueryBuilder.FIELD_MESSAGE, text);
this.rFilter = MessageRevFilter.create(text);
MessagePredicate(Provider<ReviewDb> db, ChangeIndex index, String value) {
super(ChangeField.COMMIT_MESSAGE, value);
this.db = db;
this.index = index;
}
@SuppressWarnings("unchecked")
@Override
public boolean match(Repository repo, RevWalk rw, Arguments args) {
public boolean match(ChangeData object) throws OrmException {
try {
return rFilter.include(rw, rw.parseCommit(args.objectId));
} catch (MissingObjectException e) {
log.error(args.projectName.get() + "\" commit does not exist.", e);
} catch (IncorrectObjectTypeException e) {
log.error(args.projectName.get() + "\" revision is not a commit.", e);
} catch (IOException e) {
log.error("Could not search for commit message in \"" +
args.projectName.get() + "\" repository.", e);
for (ChangeData cData : index.getSource(
Predicate.and(new LegacyChangeIdPredicate(db, object.getId()), this))
.read()) {
if (cData.getId().equals(object.getId())) {
return true;
}
}
} catch (QueryParseException e) {
throw new OrmException(e);
}
return false;
}
@ -65,4 +59,9 @@ public class MessagePredicate extends RevWalkPredicate {
public int getCost() {
return 1;
}
@Override
public boolean isIndexOnly() {
return true;
}
}