Push open/closed subindex into Lucene implementation

Storing open/closed changes in separate indexes is an optimization
specific to the Lucene implementation. Other implementations may want
to do similar splitting, but the unit of splitting is not necessarily
a fully self-contained index.

Leave getPossibleStatus in IndexRewriteImpl, since it may be useful
for other implementations' internal optimizations as well.

Change-Id: Ib2b69f2eb0b1b9246390b9c6c8bc054a9e60836e
This commit is contained in:
Dave Borowitz
2013-06-07 12:02:28 -07:00
parent f2055007bc
commit 0066d9aa02
12 changed files with 287 additions and 317 deletions

View File

@@ -36,8 +36,8 @@ import java.util.Map;
public class IndexVersionCheck implements LifecycleListener {
public static final Map<String, Integer> SCHEMA_VERSIONS = ImmutableMap.of(
"changes_open", ChangeField.SCHEMA_VERSION,
"changes_closed", ChangeField.SCHEMA_VERSION);
LuceneChangeIndex.CHANGES_OPEN, ChangeField.SCHEMA_VERSION,
LuceneChangeIndex.CHANGES_CLOSED, ChangeField.SCHEMA_VERSION);
public static File gerritIndexConfig(SitePaths sitePaths) {
return new File(sitePaths.index_dir, "gerrit_index.config");

View File

@@ -15,13 +15,18 @@
package com.google.gerrit.lucene;
import static com.google.gerrit.server.query.change.ChangeQueryBuilder.FIELD_CHANGE;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.CLOSED_STATUSES;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.OPEN_STATUSES;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gerrit.extensions.events.LifecycleListener;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.server.config.SitePaths;
import com.google.gerrit.server.index.ChangeField;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.FieldDef;
@@ -35,38 +40,33 @@ import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.ChangeDataSource;
import com.google.gerrit.server.query.change.IndexRewriteImpl;
import com.google.gwtorm.server.OrmException;
import com.google.gwtorm.server.ResultSet;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* Secondary index implementation using Apache Lucene.
@@ -76,81 +76,86 @@ import java.util.List;
* though there may be some lag between a committed write and it showing up to
* other threads' searchers.
*/
public class LuceneChangeIndex implements ChangeIndex {
private static final Logger log =
LoggerFactory.getLogger(LuceneChangeIndex.class);
@Singleton
public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
public static final Version LUCENE_VERSION = Version.LUCENE_43;
public static final String CHANGES_OPEN = "changes_open";
public static final String CHANGES_CLOSED = "changes_closed";
private final FillArgs fillArgs;
private final Directory dir;
private final IndexWriter writer;
private final SearcherManager searcherManager;
private final SubIndex openIndex;
private final SubIndex closedIndex;
LuceneChangeIndex(File file, FillArgs fillArgs) throws IOException {
@Inject
LuceneChangeIndex(SitePaths sitePaths, FillArgs fillArgs) throws IOException {
this.fillArgs = fillArgs;
dir = FSDirectory.open(file);
IndexWriterConfig writerConfig =
new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(LUCENE_VERSION));
writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(dir, writerConfig);
searcherManager = new SearcherManager(writer, true, null);
openIndex = new SubIndex(new File(sitePaths.index_dir, CHANGES_OPEN));
closedIndex = new SubIndex(new File(sitePaths.index_dir, CHANGES_CLOSED));
}
void close() {
try {
searcherManager.close();
} catch (IOException e) {
log.warn("error closing Lucene searcher", e);
}
try {
writer.close(true);
} catch (IOException e) {
log.warn("error closing Lucene writer", e);
}
try {
dir.close();
} catch (IOException e) {
log.warn("error closing Lucene directory", e);
}
@Override
public void start() {
// Do nothing.
}
@Override
public void stop() {
openIndex.close();
closedIndex.close();
}
@Override
public void insert(ChangeData cd) throws IOException {
writer.addDocument(toDocument(cd));
commit();
Term id = idTerm(cd);
Document doc = toDocument(cd);
if (cd.getChange().getStatus().isOpen()) {
closedIndex.delete(id);
openIndex.insert(doc);
} else {
openIndex.delete(id);
closedIndex.insert(doc);
}
}
@Override
public void replace(ChangeData cd) throws IOException {
writer.updateDocument(intTerm(FIELD_CHANGE, cd.getId().get()),
toDocument(cd));
commit();
Term id = idTerm(cd);
Document doc = toDocument(cd);
if (cd.getChange().getStatus().isOpen()) {
closedIndex.delete(id);
openIndex.replace(id, doc);
} else {
openIndex.delete(id);
closedIndex.replace(id, doc);
}
}
@Override
public void delete(ChangeData cd) throws IOException {
writer.deleteDocuments(intTerm(FIELD_CHANGE, cd.getId().get()));
commit();
Term id = idTerm(cd);
if (cd.getChange().getStatus().isOpen()) {
openIndex.delete(id);
} else {
closedIndex.delete(id);
}
}
@Override
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException {
return new QuerySource(toQuery(p));
Set<Change.Status> statuses = IndexRewriteImpl.getPossibleStatus(p);
List<SubIndex> indexes = Lists.newArrayListWithCapacity(2);
if (!Sets.intersection(statuses, OPEN_STATUSES).isEmpty()) {
indexes.add(openIndex);
}
if (!Sets.intersection(statuses, CLOSED_STATUSES).isEmpty()) {
indexes.add(closedIndex);
}
return new QuerySource(indexes, toQuery(p));
}
public Directory getDirectory() {
return dir;
}
public IndexWriter getWriter() {
return writer;
}
private void commit() throws IOException {
writer.commit();
searcherManager.maybeRefresh();
private Term idTerm(ChangeData cd) {
return intTerm(FIELD_CHANGE, cd.getId().get());
}
private Query toQuery(Predicate<ChangeData> p) throws QueryParseException {
@@ -214,9 +219,11 @@ public class LuceneChangeIndex implements ChangeIndex {
// TODO(dborowitz): Push limit down from predicate tree.
private static final int LIMIT = 1000;
private final List<SubIndex> indexes;
private final Query query;
public QuerySource(Query query) {
public QuerySource(List<SubIndex> indexes, Query query) {
this.indexes = indexes;
this.query = query;
}
@@ -233,36 +240,28 @@ public class LuceneChangeIndex implements ChangeIndex {
@Override
public ResultSet<ChangeData> read() throws OrmException {
try {
IndexSearcher searcher = searcherManager.acquire();
try {
ScoreDoc[] docs = searcher.search(query, LIMIT).scoreDocs;
List<ChangeData> result = Lists.newArrayListWithCapacity(docs.length);
for (ScoreDoc sd : docs) {
Document doc = searcher.doc(sd.doc);
Number v = doc.getField(FIELD_CHANGE).numericValue();
result.add(new ChangeData(new Change.Id(v.intValue())));
}
final List<ChangeData> r = Collections.unmodifiableList(result);
return new ResultSet<ChangeData>() {
@Override
public Iterator<ChangeData> iterator() {
return r.iterator();
}
@Override
public List<ChangeData> toList() {
return r;
}
@Override
public void close() {
// Do nothing.
}
};
} finally {
searcherManager.release(searcher);
List<ChangeData> result =
Lists.newArrayListWithExpectedSize(2 * getCardinality());
for (SubIndex index : indexes) {
result.addAll(index.search(query, LIMIT));
}
final List<ChangeData> r = Collections.unmodifiableList(result);
return new ResultSet<ChangeData>() {
@Override
public Iterator<ChangeData> iterator() {
return r.iterator();
}
@Override
public List<ChangeData> toList() {
return r;
}
@Override
public void close() {
// Do nothing.
}
};
} catch (IOException e) {
throw new OrmException(e);
}

View File

@@ -1,70 +0,0 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.package com.google.gerrit.server.git;
package com.google.gerrit.lucene;
import com.google.common.base.Throwables;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.gerrit.extensions.events.LifecycleListener;
import com.google.gerrit.server.config.SitePaths;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.FieldDef.FillArgs;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
@Singleton
class LuceneChangeIndexManager implements ChangeIndex.Manager,
LifecycleListener {
private final LoadingCache<String, LuceneChangeIndex> indexes;
@Inject
LuceneChangeIndexManager(final SitePaths sitePaths, final FillArgs fillArgs) {
indexes = CacheBuilder.newBuilder().build(
new CacheLoader<String, LuceneChangeIndex>() {
@Override
public LuceneChangeIndex load(String key) throws IOException {
return new LuceneChangeIndex(
new File(sitePaths.index_dir, key), fillArgs);
}
});
}
@Override
public void start() {
// Do nothing.
}
@Override
public void stop() {
for (LuceneChangeIndex index : indexes.asMap().values()) {
index.close();
}
}
@Override
public LuceneChangeIndex get(String name) throws IOException {
try {
return indexes.get(name);
} catch (ExecutionException e) {
Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
throw new IOException(e);
}
}
}

View File

@@ -34,8 +34,8 @@ public class LuceneIndexModule extends LifecycleModule {
@Override
protected void configure() {
install(new IndexModule(threads));
bind(ChangeIndex.Manager.class).to(LuceneChangeIndexManager.class);
listener().to(LuceneChangeIndexManager.class);
bind(ChangeIndex.class).to(LuceneChangeIndex.class);
listener().to(LuceneChangeIndex.class);
if (checkVersion) {
listener().to(IndexVersionCheck.class);
}

View File

@@ -0,0 +1,115 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.package com.google.gerrit.server.git;
package com.google.gerrit.lucene;
import static com.google.gerrit.lucene.LuceneChangeIndex.LUCENE_VERSION;
import static com.google.gerrit.server.query.change.ChangeQueryBuilder.FIELD_CHANGE;
import com.google.common.collect.Lists;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.server.query.change.ChangeData;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
/** Piece of the change index that is implemented as a separate Lucene index. */
class SubIndex {
private static final Logger log =
LoggerFactory.getLogger(LuceneChangeIndex.class);
private final Directory dir;
private final IndexWriter writer;
private final SearcherManager searcherManager;
SubIndex(File file) throws IOException {
dir = FSDirectory.open(file);
IndexWriterConfig writerConfig =
new IndexWriterConfig(LUCENE_VERSION, new StandardAnalyzer(LUCENE_VERSION));
writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(dir, writerConfig);
searcherManager = new SearcherManager(writer, true, null);
}
void close() {
try {
searcherManager.close();
} catch (IOException e) {
log.warn("error closing Lucene searcher", e);
}
try {
writer.close(true);
} catch (IOException e) {
log.warn("error closing Lucene writer", e);
}
try {
dir.close();
} catch (IOException e) {
log.warn("error closing Lucene directory", e);
}
}
void insert(Document doc) throws IOException {
writer.addDocument(doc);
commit();
}
void replace(Term term, Document doc) throws IOException {
writer.updateDocument(term, doc);
commit();
}
void delete(Term term) throws IOException {
writer.deleteDocuments(term);
commit();
}
List<ChangeData> search(Query query, int limit) throws IOException {
IndexSearcher searcher = searcherManager.acquire();
try {
ScoreDoc[] docs = searcher.search(query, limit).scoreDocs;
List<ChangeData> result = Lists.newArrayListWithCapacity(docs.length);
for (ScoreDoc sd : docs) {
Document doc = searcher.doc(sd.doc);
Number v = doc.getField(FIELD_CHANGE).numericValue();
result.add(new ChangeData(new Change.Id(v.intValue())));
}
return Collections.unmodifiableList(result);
} finally {
searcherManager.release(searcher);
}
}
private void commit() throws IOException {
writer.commit();
searcherManager.maybeRefresh();
}
}