Teach secondary index to handle boolean queries

Modify ChangeIndex.getSource() to allow And/Or/NotPredicates in
addition to IndexPredicates, and convert these to the appropriate
index-specific query types.

Add a preliminary rewrite step before the manual rewrite steps in
ChangeQueryRewriter. This step attempts to find maximal subtrees that
contain only boolean operations on IndexPredicates, i.e. exactly those
subtrees that can now be queried in the index.

The rewrite step is smart enough to partition children of a single
boolean node into index/non-index predicates, but not smart enough to
rearrange the tree structure beyond that. For each IndexPredicate
type, determine whether it is only satisfiable by looking at the
index; when partitioning children, a non-index-only predicates may be
duplicated to remain as a hint to the database-based rewrite step.

Change-Id: Ifc9e94f87388764781fdc3f50aba0286829c33c9
This commit is contained in:
Dave Borowitz
2013-05-23 17:01:34 -07:00
parent 60ac9201ca
commit 3982b70285
12 changed files with 480 additions and 24 deletions

View File

@@ -16,6 +16,10 @@ package com.google.gerrit.lucene;
import static com.google.gerrit.server.query.change.ChangeQueryBuilder.FIELD_CHANGE;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
import com.google.common.collect.Lists;
import com.google.gerrit.extensions.events.LifecycleListener;
import com.google.gerrit.reviewdb.client.Change;
@@ -26,6 +30,10 @@ import com.google.gerrit.server.index.FieldDef;
import com.google.gerrit.server.index.FieldDef.FillArgs;
import com.google.gerrit.server.index.FieldType;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gerrit.server.query.AndPredicate;
import com.google.gerrit.server.query.NotPredicate;
import com.google.gerrit.server.query.OrPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.ChangeDataSource;
@@ -43,6 +51,8 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@@ -132,15 +142,9 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
}
@Override
public ChangeDataSource getSource(IndexPredicate<ChangeData> p)
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException {
if (p.getType() == FieldType.INTEGER) {
return intQuery(p);
} else if (p.getType() == FieldType.EXACT) {
return exactQuery(p);
} else {
throw badFieldType(p.getType());
}
return new QuerySource(toQuery(p));
}
public Directory getDirectory() {
@@ -156,13 +160,47 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
searcherManager.maybeRefresh();
}
private Query toQuery(Predicate<ChangeData> p) throws QueryParseException {
if (p.getClass() == AndPredicate.class) {
return booleanQuery(p, MUST);
} else if (p.getClass() == OrPredicate.class) {
return booleanQuery(p, SHOULD);
} else if (p.getClass() == NotPredicate.class) {
return booleanQuery(p, MUST_NOT);
} else if (p instanceof IndexPredicate) {
return fieldQuery((IndexPredicate<ChangeData>) p);
} else {
throw new QueryParseException("Cannot convert to index predicate: " + p);
}
}
private Query booleanQuery(Predicate<ChangeData> p, BooleanClause.Occur o)
throws QueryParseException {
BooleanQuery q = new BooleanQuery();
for (int i = 0; i < p.getChildCount(); i++) {
q.add(toQuery(p.getChild(i)), o);
}
return q;
}
private Query fieldQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
if (p.getType() == FieldType.INTEGER) {
return intQuery(p);
} else if (p.getType() == FieldType.EXACT) {
return exactQuery(p);
} else {
throw badFieldType(p.getType());
}
}
private Term intTerm(String name, int value) {
BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
NumericUtils.intToPrefixCodedBytes(value, 0, bytes);
return new Term(name, bytes);
}
private QuerySource intQuery(IndexPredicate<ChangeData> p)
private Query intQuery(IndexPredicate<ChangeData> p)
throws QueryParseException {
int value;
try {
@@ -172,12 +210,11 @@ public class LuceneChangeIndex implements ChangeIndex, LifecycleListener {
} catch (IllegalArgumentException e) {
throw new QueryParseException("not an integer: " + p.getValue());
}
return new QuerySource(new TermQuery(intTerm(p.getOperator(), value)));
return new TermQuery(intTerm(p.getOperator(), value));
}
private QuerySource exactQuery(IndexPredicate<ChangeData> p) {
return new QuerySource(new TermQuery(
new Term(p.getOperator(), p.getValue())));
private Query exactQuery(IndexPredicate<ChangeData> p) {
return new TermQuery(new Term(p.getOperator(), p.getValue()));
}
private class QuerySource implements ChangeDataSource {

View File

@@ -19,6 +19,8 @@ import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.ChangeIndexer;
import com.google.gerrit.server.index.ChangeIndexerImpl;
import com.google.gerrit.server.query.change.IndexRewrite;
import com.google.gerrit.server.query.change.IndexRewriteImpl;
import com.google.inject.Injector;
import com.google.inject.Key;
@@ -44,6 +46,7 @@ public class LuceneIndexModule extends LifecycleModule {
protected void configure() {
bind(ChangeIndex.class).to(LuceneChangeIndex.class);
bind(ChangeIndexer.class).to(ChangeIndexerImpl.class);
bind(IndexRewrite.class).to(IndexRewriteImpl.class);
listener().to(LuceneChangeIndex.class);
if (checkVersion) {
listener().to(IndexVersionCheck.class);

View File

@@ -14,6 +14,7 @@
package com.google.gerrit.server.index;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.ChangeDataSource;
@@ -44,7 +45,7 @@ public interface ChangeIndex {
}
@Override
public ChangeDataSource getSource(IndexPredicate<ChangeData> p)
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException {
throw new UnsupportedOperationException();
}
@@ -81,12 +82,14 @@ public interface ChangeIndex {
* Convert the given operator predicate into a source searching the index and
* returning only the documents matching that predicate.
*
* @param p the predicate to match.
* @param p the predicate to match. Must be a tree containing only AND, OR,
* or NOT predicates as internal nodes, and {@link IndexPredicate}s as
* leaves.
* @return a source of documents matching the predicate.
*
* @throws QueryParseException if the predicate could not be converted to an
* indexed data source.
*/
public ChangeDataSource getSource(IndexPredicate<ChangeData> p)
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException;
}

View File

@@ -28,4 +28,12 @@ public abstract class IndexPredicate<I> extends OperatorPredicate<I> {
public FieldType<?> getType() {
return def.getType();
}
/**
* @return whether this predicate can only be satisfied by looking at the
* secondary index, i.e. it cannot be expressed as a query over the DB.
*/
public boolean isIndexOnly() {
return false;
}
}

View File

@@ -14,6 +14,7 @@
package com.google.gerrit.server.index;
import com.google.gerrit.server.query.change.IndexRewrite;
import com.google.inject.AbstractModule;
public class NoIndexModule extends AbstractModule {
@@ -25,5 +26,6 @@ public class NoIndexModule extends AbstractModule {
protected void configure() {
bind(ChangeIndex.class).toInstance(ChangeIndex.DISABLED);
bind(ChangeIndexer.class).toInstance(ChangeIndexer.DISABLED);
bind(IndexRewrite.class).toInstance(IndexRewrite.DISABLED);
}
}

View File

@@ -33,10 +33,10 @@ import java.util.Collection;
*/
public class PredicateWrapper extends Predicate<ChangeData> implements
ChangeDataSource {
private final IndexPredicate<ChangeData> pred;
private final Predicate<ChangeData> pred;
private final ChangeDataSource source;
public PredicateWrapper(ChangeIndex index, IndexPredicate<ChangeData> pred)
public PredicateWrapper(ChangeIndex index, Predicate<ChangeData> pred)
throws QueryParseException {
this.pred = pred;
this.source = index.getSource(pred);
@@ -84,4 +84,9 @@ public class PredicateWrapper extends Predicate<ChangeData> implements
&& getClass() == other.getClass()
&& pred.equals(((PredicateWrapper) other).pred);
}
@Override
public String toString() {
return "index(" + pred + ")";
}
}

View File

@@ -31,7 +31,6 @@ import com.google.gerrit.server.account.GroupBackends;
import com.google.gerrit.server.config.AllProjectsName;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.PredicateWrapper;
import com.google.gerrit.server.patch.PatchListCache;
import com.google.gerrit.server.project.ChangeControl;
import com.google.gerrit.server.project.ProjectCache;
@@ -297,10 +296,7 @@ public class ChangeQueryBuilder extends QueryBuilder<ChangeData> {
}
} else {
if (!file.startsWith("^") && args.index != ChangeIndex.DISABLED) {
// TODO(dborowitz): Wrap predicates in query rewriter, not here.
return new PredicateWrapper(
args.index,
new EqualsFilePredicate(args.dbProvider, args.patchListCache, file));
return new EqualsFilePredicate(args.dbProvider, args.patchListCache, file);
} else {
throw error("regular expression not permitted here: file:" + file);
}

View File

@@ -43,11 +43,14 @@ public class ChangeQueryRewriter extends QueryRewriter<ChangeData> {
null, null, null, null, null), null));
private final Provider<ReviewDb> dbProvider;
private final IndexRewrite indexRewrite;
@Inject
ChangeQueryRewriter(Provider<ReviewDb> dbProvider) {
ChangeQueryRewriter(Provider<ReviewDb> dbProvider,
IndexRewrite indexRewrite) {
super(mydef);
this.dbProvider = dbProvider;
this.indexRewrite = indexRewrite;
}
@Override
@@ -60,6 +63,11 @@ public class ChangeQueryRewriter extends QueryRewriter<ChangeData> {
return hasSource(l) ? new OrSource(l) : super.or(l);
}
@Override
public Predicate<ChangeData> rewrite(Predicate<ChangeData> in) {
return super.rewrite(indexRewrite.rewrite(in));
}
@Rewrite("-status:open")
@NoCostComputation
public Predicate<ChangeData> r00_notOpen() {

View File

@@ -54,4 +54,9 @@ class EqualsFilePredicate extends IndexPredicate<ChangeData> {
public int getCost() {
return 1;
}
@Override
public boolean isIndexOnly() {
return true;
}
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.package com.google.gerrit.server.git;
package com.google.gerrit.server.query.change;
import com.google.gerrit.server.query.Predicate;
public interface IndexRewrite {
/** Instance indicating secondary index is disabled. */
public static final IndexRewrite DISABLED = new IndexRewrite() {
@Override
public Predicate<ChangeData> rewrite(Predicate<ChangeData> in) {
return in;
}
};
/**
* Rewrite a predicate to push as much boolean logic as possible into the
* secondary index query system.
*
* @param in predicate to rewrite.
* @return a predicate with some subtrees replaced with predicates that are
* also sources that query the index directly.
*/
public Predicate<ChangeData> rewrite(Predicate<ChangeData> in);
}

View File

@@ -0,0 +1,171 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.package com.google.gerrit.server.git;
package com.google.gerrit.server.query.change;
import com.google.common.collect.Lists;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.IndexPredicate;
import com.google.gerrit.server.index.PredicateWrapper;
import com.google.gerrit.server.query.AndPredicate;
import com.google.gerrit.server.query.NotPredicate;
import com.google.gerrit.server.query.OrPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.inject.Inject;
import java.util.BitSet;
import java.util.List;
/** Rewriter that pushes boolean logic into the secondary index. */
public class IndexRewriteImpl implements IndexRewrite {
private final ChangeIndex index;
@Inject
IndexRewriteImpl(ChangeIndex index) {
this.index = index;
}
@Override
public Predicate<ChangeData> rewrite(Predicate<ChangeData> in) {
Predicate<ChangeData> out = rewriteImpl(in);
if (out == null) {
return in;
} else if (out == in) {
return wrap(out);
} else {
return out;
}
}
/**
* Rewrite a single predicate subtree.
*
* @param in predicate to rewrite.
* @return {@code null} if no part of this subtree can be queried in the
* index directly. {@code in} if this subtree and all its children can be
* queried directly in the index. Otherwise, a predicate that is
* semantically equivalent, with some of its subtrees wrapped to query the
* index directly.
*/
private Predicate<ChangeData> rewriteImpl(Predicate<ChangeData> in) {
if (in instanceof IndexPredicate) {
return in;
}
if (!isRewritePossible(in)) {
return null;
}
int n = in.getChildCount();
BitSet toKeep = new BitSet(n);
BitSet toWrap = new BitSet(n);
BitSet rewritten = new BitSet(n);
List<Predicate<ChangeData>> newChildren = Lists.newArrayListWithCapacity(n);
for (int i = 0; i < n; i++) {
Predicate<ChangeData> c = in.getChild(i);
Predicate<ChangeData> nc = rewriteImpl(c);
if (nc == null) {
toKeep.set(i);
newChildren.add(c);
} else if (nc == c) {
toWrap.set(i);
newChildren.add(nc);
} else {
rewritten.set(i);
newChildren.add(nc);
}
}
if (toKeep.cardinality() == n) {
return null; // Can't rewrite any children.
}
if (rewritten.cardinality() == n) {
// All children were partially, but not fully, rewritten.
return in.copy(newChildren);
}
if (toWrap.cardinality() == n) {
// All children can be fully rewritten, push work to parent.
return in;
}
return partitionChildren(in, newChildren, toWrap);
}
private Predicate<ChangeData> partitionChildren(Predicate<ChangeData> in,
List<Predicate<ChangeData>> newChildren, BitSet toWrap) {
if (toWrap.cardinality() == 1) {
int i = toWrap.nextSetBit(0);
newChildren.set(i, wrap(newChildren.get(i)));
return in.copy(newChildren);
}
// Group all toWrap predicates into a wrapped subtree and place it as a
// sibling of the non-/partially-wrapped predicates. Assumes partitioning
// the children into arbitrary subtrees of the same type is logically
// equivalent to having them as siblings.
List<Predicate<ChangeData>> wrapped = Lists.newArrayListWithCapacity(
toWrap.cardinality());
List<Predicate<ChangeData>> all = Lists.newArrayListWithCapacity(
newChildren.size() - toWrap.cardinality() + 1);
for (int i = 0; i < newChildren.size(); i++) {
Predicate<ChangeData> child = newChildren.get(i);
if (toWrap.get(i)) {
wrapped.add(child);
if (allNonIndexOnly(child)) {
// Duplicate non-index-only predicate subtrees alongside the wrapped
// subtrees so they can provide index hints to the DB-based rewriter.
all.add(child);
}
} else {
all.add(child);
}
}
all.add(wrap(in.copy(wrapped)));
return in.copy(all);
}
private static boolean allNonIndexOnly(Predicate<ChangeData> p) {
if (p instanceof IndexPredicate) {
return !((IndexPredicate<ChangeData>) p).isIndexOnly();
}
if (p instanceof AndPredicate
|| p instanceof OrPredicate
|| p instanceof NotPredicate) {
for (int i = 0; i < p.getChildCount(); i++) {
if (!allNonIndexOnly(p.getChild(i))) {
return false;
}
}
return true;
} else {
return true;
}
}
private PredicateWrapper wrap(Predicate<ChangeData> p) {
try {
return new PredicateWrapper(index, p);
} catch (QueryParseException e) {
throw new IllegalStateException(
"Failed to convert " + p + " to index predicate", e);
}
}
private static boolean isRewritePossible(Predicate<ChangeData> p) {
if (p.getClass() != AndPredicate.class
&& p.getClass() != OrPredicate.class
&& p.getClass() != NotPredicate.class) {
return false;
}
return p.getChildCount() > 0;
}
}

View File

@@ -0,0 +1,181 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.package com.google.gerrit.server.git;
package com.google.gerrit.server.query.change;
import com.google.common.collect.ImmutableList;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.PredicateWrapper;
import com.google.gerrit.server.query.AndPredicate;
import com.google.gerrit.server.query.OrPredicate;
import com.google.gerrit.server.query.Predicate;
import com.google.gerrit.server.query.QueryParseException;
import com.google.gwtorm.server.OrmException;
import com.google.gwtorm.server.ResultSet;
import junit.framework.TestCase;
import java.io.IOException;
@SuppressWarnings("unchecked")
public class IndexRewriteTest extends TestCase {
private static class DummyIndex implements ChangeIndex {
@Override
public void insert(ChangeData cd) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void replace(ChangeData cd) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public ChangeDataSource getSource(Predicate<ChangeData> p)
throws QueryParseException {
return new Source();
}
}
private static class Source implements ChangeDataSource {
@Override
public int getCardinality() {
throw new UnsupportedOperationException();
}
@Override
public boolean hasChange() {
throw new UnsupportedOperationException();
}
@Override
public ResultSet<ChangeData> read() throws OrmException {
throw new UnsupportedOperationException();
}
}
private DummyIndex index;
private ChangeQueryBuilder queryBuilder;
private IndexRewrite rewrite;
@Override
public void setUp() throws Exception {
super.setUp();
index = new DummyIndex();
queryBuilder = new ChangeQueryBuilder(
new ChangeQueryBuilder.Arguments(null, null, null, null, null, null,
null, null, null, null, null, null),
null);
rewrite = new IndexRewriteImpl(index);
}
public void testIndexPredicate() throws Exception {
Predicate<ChangeData> in = parse("file:a");
assertEquals(wrap(in), rewrite(in));
}
public void testNonIndexPredicate() throws Exception {
Predicate<ChangeData> in = parse("branch:a");
assertSame(in, rewrite(in));
}
public void testIndexPredicates() throws Exception {
Predicate<ChangeData> in = parse("file:a file:b");
assertEquals(wrap(in), rewrite(in));
}
public void testNonIndexPredicates() throws Exception {
Predicate<ChangeData> in = parse("branch:a OR branch:b");
assertSame(in, rewrite(in));
}
public void testOneIndexPredicate() throws Exception {
Predicate<ChangeData> in = parse("branch:a file:b");
Predicate<ChangeData> out = rewrite(in);
assertSame(AndPredicate.class, out.getClass());
assertEquals(ImmutableList.of(in.getChild(0), wrap(in.getChild(1))),
out.getChildren());
}
public void testThreeLevelTreeWithAllIndexPredicates() throws Exception {
Predicate<ChangeData> in =
parse("-status:abandoned (status:open OR status:merged)");
assertEquals(wrap(in), rewrite.rewrite(in));
}
public void testThreeLevelTreeWithSomeIndexPredicates() throws Exception {
Predicate<ChangeData> in = parse("-branch:a (file:b OR file:c)");
Predicate<ChangeData> out = rewrite(in);
assertEquals(AndPredicate.class, out.getClass());
assertEquals(ImmutableList.of(in.getChild(0), wrap(in.getChild(1))),
out.getChildren());
}
public void testMultipleIndexPredicates() throws Exception {
Predicate<ChangeData> in =
parse("file:a OR branch:b OR file:c OR branch:d");
Predicate<ChangeData> out = rewrite(in);
assertSame(OrPredicate.class, out.getClass());
assertEquals(ImmutableList.of(
in.getChild(1), in.getChild(3),
wrap(Predicate.or(in.getChild(0), in.getChild(2)))),
out.getChildren());
}
public void testDuplicateSimpleNonIndexOnlyPredicates() throws Exception {
Predicate<ChangeData> in = parse("status:new project:p file:a");
Predicate<ChangeData> out = rewrite(in);
assertSame(AndPredicate.class, out.getClass());
assertEquals(ImmutableList.of(
in.getChild(0), in.getChild(1),
wrap(Predicate.and(in.getChild(0), in.getChild(2)))),
out.getChildren());
}
public void testDuplicateCompoundNonIndexOnlyPredicates() throws Exception {
Predicate<ChangeData> in =
parse("(status:new OR status:draft) project:p file:a");
Predicate<ChangeData> out = rewrite(in);
assertSame(AndPredicate.class, out.getClass());
assertEquals(ImmutableList.of(
in.getChild(0), in.getChild(1),
wrap(Predicate.and(in.getChild(0), in.getChild(2)))),
out.getChildren());
}
public void testDuplicateCompoundIndexOnlyPredicates() throws Exception {
Predicate<ChangeData> in =
parse("(status:new OR file:a) project:p file:b");
Predicate<ChangeData> out = rewrite(in);
assertSame(AndPredicate.class, out.getClass());
assertEquals(ImmutableList.of(
in.getChild(1),
wrap(Predicate.and(in.getChild(0), in.getChild(2)))),
out.getChildren());
}
private Predicate<ChangeData> parse(String query) throws QueryParseException {
return queryBuilder.parse(query);
}
private Predicate<ChangeData> rewrite(Predicate<ChangeData> in) {
return rewrite.rewrite(in);
}
private PredicateWrapper wrap(Predicate<ChangeData> p)
throws QueryParseException {
return new PredicateWrapper(index, p);
}
}