Merge results from multiple Lucene subindexes

Instead of building a union of the results, merge the results.  This
allows a future change to support a sorting option, and Lucene's
native merge will perform a merge-sort that selects the top N
documents, ignoring the subindex the document came from.

Change-Id: Iab731bb5979156a6bafeace1cd73d7f24a541227
This commit is contained in:
Shawn Pearce
2013-06-24 11:41:16 -06:00
parent 36abbd88f3
commit 4f77a27de9
2 changed files with 39 additions and 28 deletions

View File

@@ -17,7 +17,6 @@ package com.google.gerrit.lucene;
import static com.google.gerrit.server.query.change.ChangeQueryBuilder.FIELD_CHANGE;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.CLOSED_STATUSES;
import static com.google.gerrit.server.query.change.IndexRewriteImpl.OPEN_STATUSES;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
@@ -54,12 +53,17 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
@@ -78,6 +82,9 @@ import java.util.Set;
*/
@Singleton
public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
private static final Logger log =
LoggerFactory.getLogger(LuceneChangeIndex.class);
public static final Version LUCENE_VERSION = Version.LUCENE_43;
public static final String CHANGES_OPEN = "changes_open";
public static final String CHANGES_CLOSED = "changes_closed";
@@ -239,12 +246,23 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
@Override
public ResultSet<ChangeData> read() throws OrmException {
IndexSearcher[] searchers = new IndexSearcher[indexes.size()];
try {
List<ChangeData> result =
Lists.newArrayListWithExpectedSize(2 * getCardinality());
for (SubIndex index : indexes) {
result.addAll(index.search(query, LIMIT));
TopDocs[] hits = new TopDocs[indexes.size()];
for (int i = 0; i < indexes.size(); i++) {
searchers[i] = indexes.get(i).acquire();
hits[i] = searchers[i].search(query, LIMIT);
}
TopDocs docs = TopDocs.merge(null, LIMIT, hits);
List<ChangeData> result =
Lists.newArrayListWithCapacity(docs.scoreDocs.length);
for (ScoreDoc sd : docs.scoreDocs) {
Document doc = searchers[sd.shardIndex].doc(sd.doc);
Number v = doc.getField(FIELD_CHANGE).numericValue();
result.add(new ChangeData(new Change.Id(v.intValue())));
}
final List<ChangeData> r = Collections.unmodifiableList(result);
return new ResultSet<ChangeData>() {
@Override
@@ -264,6 +282,16 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
};
} catch (IOException e) {
throw new OrmException(e);
} finally {
for (int i = 0; i < indexes.size(); i++) {
if (searchers[i] != null) {
try {
indexes.get(i).release(searchers[i]);
} catch (IOException e) {
log.warn("cannot release Lucene searcher", e);
}
}
}
}
}
}

View File

@@ -15,11 +15,6 @@
package com.google.gerrit.lucene;
import static com.google.gerrit.lucene.LuceneChangeIndex.LUCENE_VERSION;
import static com.google.gerrit.server.query.change.ChangeQueryBuilder.FIELD_CHANGE;
import com.google.common.collect.Lists;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.server.query.change.ChangeData;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
@@ -28,8 +23,6 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -38,8 +31,6 @@ import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
/** Piece of the change index that is implemented as a separate Lucene index. */
class SubIndex {
@@ -92,20 +83,12 @@ class SubIndex {
commit();
}
List<ChangeData> search(Query query, int limit) throws IOException {
IndexSearcher searcher = searcherManager.acquire();
try {
ScoreDoc[] docs = searcher.search(query, limit).scoreDocs;
List<ChangeData> result = Lists.newArrayListWithCapacity(docs.length);
for (ScoreDoc sd : docs) {
Document doc = searcher.doc(sd.doc);
Number v = doc.getField(FIELD_CHANGE).numericValue();
result.add(new ChangeData(new Change.Id(v.intValue())));
}
return Collections.unmodifiableList(result);
} finally {
searcherManager.release(searcher);
}
IndexSearcher acquire() throws IOException {
return searcherManager.acquire();
}
void release(IndexSearcher searcher) throws IOException {
searcherManager.release(searcher);
}
private void commit() throws IOException {