Add secondary index implementation using SolrCloud

SolrCloud can be used instead of Lucene by adding "type = SOLR"
under [index] and "url = <zookeeper-url>" under [index "solr"]
in gerrit.config.

Change-Id: I0ff8579c5e23c58b16f3605bc20eba4e80fb40fc
This commit is contained in:
Ahaan Ugale
2013-06-12 17:22:19 -06:00
committed by Shawn Pearce
parent 9279b29da7
commit 404c8246bc
23 changed files with 940 additions and 284 deletions

View File

@@ -14,11 +14,10 @@
package com.google.gerrit.lucene;
import static com.google.gerrit.lucene.IndexVersionCheck.SCHEMA_VERSIONS;
import static com.google.gerrit.lucene.IndexVersionCheck.gerritIndexConfig;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.CLOSED_STATUSES;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.OPEN_STATUSES;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableSet;
@@ -35,18 +34,11 @@ import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.FieldDef;
import com.google.gerrit.server.index.FieldDef.FillArgs;
import com.google.gerrit.server.index.FieldType;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gerrit.server.index.RegexPredicate;
import com.google.gerrit.server.index.TimestampRangePredicate;
import com.google.gerrit.server.query.AndPredicate;
import com.google.gerrit.server.query.NotPredicate;
import com.google.gerrit.server.query.OrPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.ChangeDataSource;
import com.google.gerrit.server.query.change.IndexRewriteImpl;
import com.google.gerrit.server.query.change.SortKeyPredicate;
import com.google.gwtorm.server.OrmException;
import com.google.gwtorm.server.ResultSet;
@@ -62,24 +54,18 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.util.FS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -89,6 +75,7 @@ import java.sql.Timestamp;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
@@ -122,6 +109,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
return writerConfig;
}
private final SitePaths sitePaths;
private final FillArgs fillArgs;
private final ExecutorService executor;
private final boolean readOnly;
@@ -131,6 +119,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
LuceneChangeIndex(Config cfg, SitePaths sitePaths,
ListeningScheduledExecutorService executor, FillArgs fillArgs,
boolean readOnly) throws IOException {
this.sitePaths = sitePaths;
this.fillArgs = fillArgs;
this.executor = executor;
this.readOnly = readOnly;
@@ -167,7 +156,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
@SuppressWarnings("unchecked")
@Override
public ListenableFuture<Void> insert(ChangeData cd) throws IOException {
Term id = idTerm(cd);
Term id = QueryBuilder.idTerm(cd);
Document doc = toDocument(cd);
if (readOnly) {
return Futures.immediateFuture(null);
@@ -187,7 +176,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
@SuppressWarnings("unchecked")
@Override
public ListenableFuture<Void> replace(ChangeData cd) throws IOException {
Term id = idTerm(cd);
Term id = QueryBuilder.idTerm(cd);
Document doc = toDocument(cd);
if (readOnly) {
return Futures.immediateFuture(null);
@@ -206,7 +195,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
@SuppressWarnings("unchecked")
@Override
public ListenableFuture<Void> delete(ChangeData cd) throws IOException {
Term id = idTerm(cd);
Term id = QueryBuilder.idTerm(cd);
if (readOnly) {
return Futures.immediateFuture(null);
}
@@ -226,6 +215,11 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
});
}
@Override
public void deleteAll() throws IOException {
openIndex.deleteAll();
}
@Override
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException {
@@ -237,138 +231,7 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
if (!Sets.intersection(statuses, CLOSED_STATUSES).isEmpty()) {
indexes.add(closedIndex);
}
return new QuerySource(indexes, toQuery(p));
}
private Term idTerm(ChangeData cd) {
return intTerm(ID_FIELD, cd.getId().get());
}
private Query toQuery(Predicate<ChangeData> p) throws QueryParseException {
if (p.getClass() == AndPredicate.class) {
return booleanQuery(p, MUST);
} else if (p.getClass() == OrPredicate.class) {
return booleanQuery(p, SHOULD);
} else if (p.getClass() == NotPredicate.class) {
if (p.getChild(0) instanceof TimestampRangePredicate) {
return notTimestampQuery(
(TimestampRangePredicate<ChangeData>) p.getChild(0));
}
return booleanQuery(p, MUST_NOT);
} else if (p instanceof IndexPredicate) {
return fieldQuery((IndexPredicate<ChangeData>) p);
} else {
throw new QueryParseException("Cannot convert to index predicate: " + p);
}
}
private Query booleanQuery(Predicate<ChangeData> p, BooleanClause.Occur o)
throws QueryParseException {
BooleanQuery q = new BooleanQuery();
for (int i = 0; i < p.getChildCount(); i++) {
q.add(toQuery(p.getChild(i)), o);
}
return q;
}
private Query fieldQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
if (p.getType() == FieldType.INTEGER) {
return intQuery(p);
} else if (p.getType() == FieldType.TIMESTAMP) {
return timestampQuery(p);
} else if (p.getType() == FieldType.EXACT) {
return exactQuery(p);
} else if (p.getType() == FieldType.PREFIX) {
return prefixQuery(p);
} else if (p.getType() == FieldType.FULL_TEXT) {
return fullTextQuery(p);
} else if (p instanceof SortKeyPredicate) {
return sortKeyQuery((SortKeyPredicate) p);
} else {
throw badFieldType(p.getType());
}
}
private Term intTerm(String name, int value) {
BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
NumericUtils.intToPrefixCodedBytes(value, 0, bytes);
return new Term(name, bytes);
}
private Query intQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
int value;
try {
// Can't use IntPredicate because it and IndexPredicate are different
// subclasses of OperatorPredicate.
value = Integer.valueOf(p.getValue());
} catch (IllegalArgumentException e) {
throw new QueryParseException("not an integer: " + p.getValue());
}
return new TermQuery(intTerm(p.getField().getName(), value));
}
private static Query sortKeyQuery(SortKeyPredicate p) {
return NumericRangeQuery.newLongRange(
p.getField().getName(),
p.getMinValue(),
p.getMaxValue(),
true, true);
}
private static Query timestampQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
if (p instanceof TimestampRangePredicate) {
TimestampRangePredicate<ChangeData> r =
(TimestampRangePredicate<ChangeData>) p;
return NumericRangeQuery.newIntRange(
r.getField().getName(),
toIndexTime(r.getMinTimestamp()),
toIndexTime(r.getMaxTimestamp()),
true, true);
}
throw new QueryParseException("not a timestamp: " + p);
}
private static Query notTimestampQuery(TimestampRangePredicate<ChangeData> r)
throws QueryParseException {
if (r.getMinTimestamp().getTime() == 0) {
return NumericRangeQuery.newIntRange(
r.getField().getName(),
toIndexTime(r.getMaxTimestamp()),
null,
true, true);
}
throw new QueryParseException("cannot negate: " + r);
}
private Query exactQuery(IndexPredicate<ChangeData> p) {
if (p instanceof RegexPredicate<?>) {
return regexQuery(p);
} else {
return new TermQuery(new Term(p.getField().getName(), p.getValue()));
}
}
private Query regexQuery(IndexPredicate<ChangeData> p) {
String re = p.getValue();
if (re.startsWith("^")) {
re = re.substring(1);
}
if (re.endsWith("$") && !re.endsWith("\\$")) {
re = re.substring(0, re.length() - 1);
}
return new RegexpQuery(new Term(p.getField().getName(), re));
}
private Query prefixQuery(IndexPredicate<ChangeData> p) {
return new PrefixQuery(new Term(p.getField().getName(), p.getValue()));
}
private Query fullTextQuery(IndexPredicate<ChangeData> p) {
return new FuzzyQuery(new Term(p.getField().getName(), p.getValue()));
return new QuerySource(indexes, QueryBuilder.toQuery(p));
}
private static class QuerySource implements ChangeDataSource {
@@ -485,7 +348,8 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
}
} else if (f.getType() == FieldType.TIMESTAMP) {
for (Object v : values) {
doc.add(new IntField(name, toIndexTime((Timestamp) v), store));
int t = QueryBuilder.toIndexTime((Timestamp) v);
doc.add(new IntField(name, t, store));
}
} else if (f.getType() == FieldType.EXACT
|| f.getType() == FieldType.PREFIX) {
@@ -497,19 +361,24 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
doc.add(new TextField(name, (String) value, store));
}
} else {
throw badFieldType(f.getType());
throw QueryBuilder.badFieldType(f.getType());
}
}
private static int toIndexTime(Timestamp ts) {
return (int) (ts.getTime() / 60000);
}
private static Field.Store store(FieldDef<?, ?> f) {
return f.isStored() ? Field.Store.YES : Field.Store.NO;
}
private static IllegalArgumentException badFieldType(FieldType<?> t) {
return new IllegalArgumentException("unknown index field type " + t);
@Override
public void finishIndex() throws IOException,
ConfigInvalidException {
FileBasedConfig cfg =
new FileBasedConfig(gerritIndexConfig(sitePaths), FS.detect());
for (Map.Entry<String, Integer> e : SCHEMA_VERSIONS.entrySet()) {
cfg.setInt("index", e.getKey(), "schemaVersion", e.getValue());
}
cfg.setEnum("lucene", null, "version", LUCENE_VERSION);
cfg.save();
}
}

View File

@@ -0,0 +1,192 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.lucene;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
import com.google.gerrit.server.index.ChangeField;
import com.google.gerrit.server.index.FieldType;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gerrit.server.index.RegexPredicate;
import com.google.gerrit.server.index.TimestampRangePredicate;
import com.google.gerrit.server.query.AndPredicate;
import com.google.gerrit.server.query.NotPredicate;
import com.google.gerrit.server.query.OrPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.SortKeyPredicate;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import java.sql.Timestamp;
public class QueryBuilder {
private static final String ID_FIELD = ChangeField.LEGACY_ID.getName();
public static Term idTerm(ChangeData cd) {
return intTerm(ID_FIELD, cd.getId().get());
}
public static Query toQuery(Predicate<ChangeData> p)
throws QueryParseException {
if (p.getClass() == AndPredicate.class) {
return booleanQuery(p, MUST);
} else if (p.getClass() == OrPredicate.class) {
return booleanQuery(p, SHOULD);
} else if (p.getClass() == NotPredicate.class) {
if (p.getChild(0) instanceof TimestampRangePredicate) {
return notTimestampQuery(
(TimestampRangePredicate<ChangeData>) p.getChild(0));
}
return booleanQuery(p, MUST_NOT);
} else if (p instanceof IndexPredicate) {
return fieldQuery((IndexPredicate<ChangeData>) p);
} else {
throw new QueryParseException("Cannot convert to index predicate: " + p);
}
}
private static Query booleanQuery(Predicate<ChangeData> p, BooleanClause.Occur o)
throws QueryParseException {
BooleanQuery q = new BooleanQuery();
for (int i = 0; i < p.getChildCount(); i++) {
q.add(toQuery(p.getChild(i)), o);
}
return q;
}
private static Query fieldQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
if (p.getType() == FieldType.INTEGER) {
return intQuery(p);
} else if (p.getType() == FieldType.TIMESTAMP) {
return timestampQuery(p);
} else if (p.getType() == FieldType.EXACT) {
return exactQuery(p);
} else if (p.getType() == FieldType.PREFIX) {
return prefixQuery(p);
} else if (p.getType() == FieldType.FULL_TEXT) {
return fullTextQuery(p);
} else if (p instanceof SortKeyPredicate) {
return sortKeyQuery((SortKeyPredicate) p);
} else {
throw badFieldType(p.getType());
}
}
private static Term intTerm(String name, int value) {
BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
NumericUtils.intToPrefixCodedBytes(value, 0, bytes);
return new Term(name, bytes);
}
private static Query intQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
int value;
try {
// Can't use IntPredicate because it and IndexPredicate are different
// subclasses of OperatorPredicate.
value = Integer.valueOf(p.getValue());
} catch (IllegalArgumentException e) {
throw new QueryParseException("not an integer: " + p.getValue());
}
return new TermQuery(intTerm(p.getField().getName(), value));
}
private static Query sortKeyQuery(SortKeyPredicate p) {
return NumericRangeQuery.newLongRange(
p.getField().getName(),
p.getMinValue(),
p.getMaxValue(),
true, true);
}
private static Query timestampQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
if (p instanceof TimestampRangePredicate) {
TimestampRangePredicate<ChangeData> r =
(TimestampRangePredicate<ChangeData>) p;
return NumericRangeQuery.newIntRange(
r.getField().getName(),
toIndexTime(r.getMinTimestamp()),
toIndexTime(r.getMaxTimestamp()),
true, true);
}
throw new QueryParseException("not a timestamp: " + p);
}
private static Query notTimestampQuery(TimestampRangePredicate<ChangeData> r)
throws QueryParseException {
if (r.getMinTimestamp().getTime() == 0) {
return NumericRangeQuery.newIntRange(
r.getField().getName(),
toIndexTime(r.getMaxTimestamp()),
null,
true, true);
}
throw new QueryParseException("cannot negate: " + r);
}
private static Query exactQuery(IndexPredicate<ChangeData> p) {
if (p instanceof RegexPredicate<?>) {
return regexQuery(p);
} else {
return new TermQuery(new Term(p.getField().getName(), p.getValue()));
}
}
private static Query regexQuery(IndexPredicate<ChangeData> p) {
String re = p.getValue();
if (re.startsWith("^")) {
re = re.substring(1);
}
if (re.endsWith("$") && !re.endsWith("\\$")) {
re = re.substring(0, re.length() - 1);
}
return new RegexpQuery(new Term(p.getField().getName(), re));
}
private static Query prefixQuery(IndexPredicate<ChangeData> p) {
return new PrefixQuery(new Term(p.getField().getName(), p.getValue()));
}
private static Query fullTextQuery(IndexPredicate<ChangeData> p) {
return new FuzzyQuery(new Term(p.getField().getName(), p.getValue()));
}
public static int toIndexTime(Timestamp ts) {
return (int) (ts.getTime() / 60000);
}
public static IllegalArgumentException badFieldType(FieldType<?> t) {
return new IllegalArgumentException("unknown index field type " + t);
}
private QueryBuilder() {
}
}

View File

@@ -114,6 +114,10 @@ class SubIndex {
return new NrtFuture(writer.deleteDocuments(term));
}
void deleteAll() throws IOException {
writer.deleteAll();
}
IndexSearcher acquire() throws IOException {
return nrtManager.acquire();
}