Update Lucene to 5.0.0
Use this version starting at the existing schema version 15 (which has been in master for only a week or so). Notable changes: - Can use the new IndexWriter#setCommitOnClose(boolean) method to simplify closing an index. - This means we no longer need to pass Version into the IndexWriterConstructor. According to [1], this was _only_ used to determine whether or not the index should be committed on close, as this behavior differed between versions. No more mapping schema versions to Lucene versions! - IndexWriters are now forced to use their configured Analyzer, removing the methods taking an Analyzer (which we weren't using anyway). This saves some code in AutoCommitWriter. - Lucene 5 cannot read indexes created by older versions without an additional jar in the classpath, so we need to add that. The most annoying change is that sorting cannot be done on normal numeric fields by default anymore[2]. This was inefficient anyway, as Lucene had to seek and read all index field values before doing the sorting. Switch to the newer DocValues API for strongly-typed sortable fields. This introduces some medium-term ugliness as the sort spec changes depending on the schema version. Unfortunately we can only use DocValues on new index versions; older versions need to use the new UninvertingReader API, which provides FieldCache based sorting without a reindex. An overzealous check in a static method in Lucene[3] means we need to temporarily fork SearcherManager.java from Lucene in order to get this to work with the NRT machinery. Since we have to jump through significant hoops to get older index versions readable by this version of Lucene, add a test specifically for schema v14. [1] https://issues.apache.org/jira/browse/LUCENE-5871 [2] https://issues.apache.org/jira/browse/LUCENE-5666 [3] https://issues.apache.org/jira/browse/LUCENE-6370 Change-Id: I843be2fb697779fc741e25459a2716280b2bd0b6
This commit is contained in:
@@ -14,10 +14,12 @@
|
||||
|
||||
package com.google.gerrit.lucene;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static com.google.common.collect.Iterables.getOnlyElement;
|
||||
import static com.google.gerrit.server.git.QueueProvider.QueueType.INTERACTIVE;
|
||||
import static com.google.gerrit.server.index.IndexRewriteImpl.CLOSED_STATUSES;
|
||||
import static com.google.gerrit.server.index.IndexRewriteImpl.OPEN_STATUSES;
|
||||
|
||||
import static java.util.concurrent.TimeUnit.MILLISECONDS;
|
||||
import static java.util.concurrent.TimeUnit.MINUTES;
|
||||
|
||||
@@ -39,7 +41,6 @@ import com.google.gerrit.server.index.ChangeField;
|
||||
import com.google.gerrit.server.index.ChangeField.ChangeProtoField;
|
||||
import com.google.gerrit.server.index.ChangeField.PatchSetApprovalProtoField;
|
||||
import com.google.gerrit.server.index.ChangeIndex;
|
||||
import com.google.gerrit.server.index.ChangeSchemas;
|
||||
import com.google.gerrit.server.index.FieldDef;
|
||||
import com.google.gerrit.server.index.FieldDef.FillArgs;
|
||||
import com.google.gerrit.server.index.FieldType;
|
||||
@@ -64,9 +65,12 @@ import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
@@ -76,13 +80,14 @@ import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.SearcherFactory;
|
||||
import org.apache.lucene.search.SearcherManager;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.uninverting.UninvertingReader;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.eclipse.jgit.errors.ConfigInvalidException;
|
||||
import org.eclipse.jgit.lib.Config;
|
||||
import org.eclipse.jgit.storage.file.FileBasedConfig;
|
||||
@@ -120,54 +125,19 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
private static final String CHANGE_FIELD = ChangeField.CHANGE.getName();
|
||||
private static final String DELETED_FIELD = ChangeField.DELETED.getName();
|
||||
private static final String ID_FIELD = ChangeField.LEGACY_ID.getName();
|
||||
private static final String ID_SORT_FIELD =
|
||||
sortFieldName(ChangeField.LEGACY_ID);
|
||||
private static final String MERGEABLE_FIELD = ChangeField.MERGEABLE.getName();
|
||||
private static final String UPDATED_SORT_FIELD =
|
||||
sortFieldName(ChangeField.UPDATED);
|
||||
|
||||
private static final ImmutableSet<String> FIELDS = ImmutableSet.of(
|
||||
ADDED_FIELD, APPROVAL_FIELD, CHANGE_FIELD, DELETED_FIELD, ID_FIELD,
|
||||
MERGEABLE_FIELD);
|
||||
|
||||
private static final Map<String, String> CUSTOM_CHAR_MAPPING = ImmutableMap.of(
|
||||
"_", " ", ".", " ");
|
||||
|
||||
private static final Map<Schema<ChangeData>, Version> LUCENE_VERSIONS;
|
||||
static {
|
||||
ImmutableMap.Builder<Schema<ChangeData>, Version> versions =
|
||||
ImmutableMap.builder();
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene43 = Version.LUCENE_43;
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene44 = Version.LUCENE_44;
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene46 = Version.LUCENE_46;
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene47 = Version.LUCENE_47;
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene48 = Version.LUCENE_48;
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene410 = Version.LUCENE_4_10_0;
|
||||
// We are using 4.10.2 but there is no difference in the index
|
||||
// format since 4.10.1, so we reuse the version here.
|
||||
@SuppressWarnings("deprecation")
|
||||
Version lucene4101 = Version.LUCENE_4_10_1;
|
||||
for (Map.Entry<Integer, Schema<ChangeData>> e
|
||||
: ChangeSchemas.ALL.entrySet()) {
|
||||
if (e.getKey() <= 3) {
|
||||
versions.put(e.getValue(), lucene43);
|
||||
} else if (e.getKey() <= 5) {
|
||||
versions.put(e.getValue(), lucene44);
|
||||
} else if (e.getKey() <= 8) {
|
||||
versions.put(e.getValue(), lucene46);
|
||||
} else if (e.getKey() <= 10) {
|
||||
versions.put(e.getValue(), lucene47);
|
||||
} else if (e.getKey() <= 11) {
|
||||
versions.put(e.getValue(), lucene48);
|
||||
} else if (e.getKey() <= 13) {
|
||||
versions.put(e.getValue(), lucene410);
|
||||
} else {
|
||||
versions.put(e.getValue(), lucene4101);
|
||||
}
|
||||
}
|
||||
LUCENE_VERSIONS = versions.build();
|
||||
}
|
||||
|
||||
public static void setReady(SitePaths sitePaths, int version, boolean ready)
|
||||
throws IOException {
|
||||
try {
|
||||
@@ -180,6 +150,10 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
}
|
||||
}
|
||||
|
||||
private static String sortFieldName(FieldDef<?, ?> f) {
|
||||
return f.getName() + "_SORT";
|
||||
}
|
||||
|
||||
static interface Factory {
|
||||
LuceneChangeIndex create(Schema<ChangeData> schema, String base);
|
||||
}
|
||||
@@ -188,12 +162,13 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
private final IndexWriterConfig luceneConfig;
|
||||
private long commitWithinMs;
|
||||
|
||||
private GerritIndexWriterConfig(Version version, Config cfg, String name) {
|
||||
private GerritIndexWriterConfig(Config cfg, String name) {
|
||||
CustomMappingAnalyzer analyzer =
|
||||
new CustomMappingAnalyzer(new StandardAnalyzer(
|
||||
CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);
|
||||
luceneConfig = new IndexWriterConfig(version, analyzer);
|
||||
luceneConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
||||
luceneConfig = new IndexWriterConfig(analyzer)
|
||||
.setOpenMode(OpenMode.CREATE_OR_APPEND)
|
||||
.setCommitOnClose(true);
|
||||
double m = 1 << 20;
|
||||
luceneConfig.setRAMBufferSizeMB(cfg.getLong(
|
||||
"index", name, "ramBufferSize",
|
||||
@@ -229,6 +204,17 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
private final SubIndex openIndex;
|
||||
private final SubIndex closedIndex;
|
||||
|
||||
/**
|
||||
* Whether to use DocValues for range/sorted numeric fields.
|
||||
* <p>
|
||||
* Lucene 5 removed support for sorting based on normal numeric fields, so we
|
||||
* use the newer API for more strongly typed numeric fields in newer schema
|
||||
* versions. These fields also are not stored, so we need to store auxiliary
|
||||
* stored-only field for them as well.
|
||||
*/
|
||||
// TODO(dborowitz): Delete when we delete support for pre-Lucene-5.0 schemas.
|
||||
private final boolean useDocValuesForSorting;
|
||||
|
||||
@AssistedInject
|
||||
LuceneChangeIndex(
|
||||
@GerritServerConfig Config cfg,
|
||||
@@ -245,10 +231,8 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
this.db = db;
|
||||
this.changeDataFactory = changeDataFactory;
|
||||
this.schema = schema;
|
||||
this.useDocValuesForSorting = schema.getVersion() >= 15;
|
||||
|
||||
Version luceneVersion = checkNotNull(
|
||||
LUCENE_VERSIONS.get(schema),
|
||||
"unknown Lucene version for index schema: %s", schema);
|
||||
CustomMappingAnalyzer analyzer =
|
||||
new CustomMappingAnalyzer(new StandardAnalyzer(CharArraySet.EMPTY_SET),
|
||||
CUSTOM_CHAR_MAPPING);
|
||||
@@ -258,21 +242,44 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
BooleanQuery.getMaxClauseCount()));
|
||||
|
||||
GerritIndexWriterConfig openConfig =
|
||||
new GerritIndexWriterConfig(luceneVersion, cfg, "changes_open");
|
||||
new GerritIndexWriterConfig(cfg, "changes_open");
|
||||
GerritIndexWriterConfig closedConfig =
|
||||
new GerritIndexWriterConfig(luceneVersion, cfg, "changes_closed");
|
||||
new GerritIndexWriterConfig(cfg, "changes_closed");
|
||||
|
||||
SearcherFactory searcherFactory = newSearcherFactory();
|
||||
if (cfg.getBoolean("index", "lucene", "testInmemory", false)) {
|
||||
openIndex = new SubIndex(new RAMDirectory(), "ramOpen", openConfig);
|
||||
closedIndex = new SubIndex(new RAMDirectory(), "ramClosed", closedConfig);
|
||||
openIndex = new SubIndex(new RAMDirectory(), "ramOpen", openConfig,
|
||||
searcherFactory);
|
||||
closedIndex = new SubIndex(new RAMDirectory(), "ramClosed", closedConfig,
|
||||
searcherFactory);
|
||||
} else {
|
||||
Path dir = base != null ? Paths.get(base)
|
||||
: LuceneVersionManager.getDir(sitePaths, schema);
|
||||
openIndex = new SubIndex(dir.resolve(CHANGES_OPEN), openConfig);
|
||||
closedIndex = new SubIndex(dir.resolve(CHANGES_CLOSED), closedConfig);
|
||||
openIndex = new SubIndex(dir.resolve(CHANGES_OPEN), openConfig,
|
||||
searcherFactory);
|
||||
closedIndex = new SubIndex(dir.resolve(CHANGES_CLOSED), closedConfig,
|
||||
searcherFactory);
|
||||
}
|
||||
}
|
||||
|
||||
private SearcherFactory newSearcherFactory() {
|
||||
if (useDocValuesForSorting) {
|
||||
return new SearcherFactory();
|
||||
}
|
||||
final Map<String, UninvertingReader.Type> mapping = ImmutableMap.of(
|
||||
ChangeField.LEGACY_ID.getName(), UninvertingReader.Type.INTEGER,
|
||||
ChangeField.UPDATED.getName(), UninvertingReader.Type.LONG);
|
||||
return new SearcherFactory() {
|
||||
@Override
|
||||
public IndexSearcher newSearcher(IndexReader reader) {
|
||||
checkState(reader instanceof DirectoryReader,
|
||||
"expected DirectoryReader, found %s", reader.getClass().getName());
|
||||
return new IndexSearcher(
|
||||
UninvertingReader.wrap((DirectoryReader) reader, mapping));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
List<ListenableFuture<?>> closeFutures = Lists.newArrayListWithCapacity(2);
|
||||
@@ -355,12 +362,18 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
setReady(sitePaths, schema.getVersion(), ready);
|
||||
}
|
||||
|
||||
private static Sort getSort() {
|
||||
return new Sort(
|
||||
new SortField(
|
||||
ChangeField.UPDATED.getName(), SortField.Type.LONG, true),
|
||||
new SortField(
|
||||
ChangeField.LEGACY_ID.getName(), SortField.Type.INT, true));
|
||||
private Sort getSort() {
|
||||
if (useDocValuesForSorting) {
|
||||
return new Sort(
|
||||
new SortField(UPDATED_SORT_FIELD, SortField.Type.LONG, true),
|
||||
new SortField(ID_SORT_FIELD, SortField.Type.LONG, true));
|
||||
} else {
|
||||
return new Sort(
|
||||
new SortField(
|
||||
ChangeField.UPDATED.getName(), SortField.Type.LONG, true),
|
||||
new SortField(
|
||||
ChangeField.LEGACY_ID.getName(), SortField.Type.INT, true));
|
||||
}
|
||||
}
|
||||
|
||||
private class QuerySource implements ChangeDataSource {
|
||||
@@ -506,6 +519,16 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
FieldType<?> type = values.getField().getType();
|
||||
Store store = store(values.getField());
|
||||
|
||||
if (useDocValuesForSorting) {
|
||||
if (values.getField() == ChangeField.LEGACY_ID) {
|
||||
int v = (Integer) getOnlyElement(values.getValues());
|
||||
doc.add(new NumericDocValuesField(ID_SORT_FIELD, v));
|
||||
} else if (values.getField() == ChangeField.UPDATED) {
|
||||
long t = ((Timestamp) getOnlyElement(values.getValues())).getTime();
|
||||
doc.add(new NumericDocValuesField(UPDATED_SORT_FIELD, t));
|
||||
}
|
||||
}
|
||||
|
||||
if (type == FieldType.INTEGER || type == FieldType.INTEGER_RANGE) {
|
||||
for (Object value : values.getValues()) {
|
||||
doc.add(new IntField(name, (Integer) value, store));
|
||||
|
Reference in New Issue
Block a user