Add maximum query size to IndexConfig

Keep track of the number of leaf index terms encountered during
IndexRewriteImpl and fail if it exceeds the value in IndexConfig. A
single count is shared across all query subtrees, under the assumption
that performance problems associated with too many terms will crop up
even if those terms are split across multiple queries.

Change-Id: Ib6e791b8834794ef5ffeacdd0935c2fe60dc8fbb
This commit is contained in:
Dave Borowitz 2015-10-15 11:20:30 -04:00
parent 8408244c21
commit d034ca8b14
5 changed files with 59 additions and 9 deletions

View File

@ -2363,6 +2363,15 @@ this threshold times the requested limit will result in an error. Set to
+
Defaults to no limit.
[[index.maxTerms]]index.maxTerms::
+
Maximum number of leaf terms to allow in a query. Too-large queries may
perform poorly, so setting this option causes query parsing to fail fast
before attempting to send them to the secondary index. Set to 0 for no
limit.
+
Defaults to 500.
==== Lucene configuration
Open and closed changes are indexed in separate indexes named

View File

@ -29,24 +29,27 @@ import org.eclipse.jgit.lib.Config;
*/
@AutoValue
public abstract class IndexConfig {
private static final int DEFAULT_MAX_TERMS = 500;
private static final int DEFAULT_MAX_PREFIX_TERMS = 100;
public static IndexConfig createDefault() {
return create(0, 0, DEFAULT_MAX_PREFIX_TERMS);
return create(0, 0, DEFAULT_MAX_TERMS, DEFAULT_MAX_PREFIX_TERMS);
}
public static IndexConfig fromConfig(Config cfg) {
return create(
cfg.getInt("index", null, "maxLimit", 0),
cfg.getInt("index", null, "maxPages", 0),
cfg.getInt("index", null, "maxTerms", 0),
cfg.getInt("index", null, "maxPrefixTerms", DEFAULT_MAX_PREFIX_TERMS));
}
public static IndexConfig create(int maxLimit, int maxPages,
int maxPrefixTerms) {
int maxTerms, int maxPrefixTerms) {
return new AutoValue_IndexConfig(
checkLimit(maxLimit, "maxLimit", Integer.MAX_VALUE),
checkLimit(maxPages, "maxPages", Integer.MAX_VALUE),
checkLimit(maxTerms, "maxTerms", Integer.MAX_VALUE),
checkLimit(maxPrefixTerms, "maxPrefixTerms", DEFAULT_MAX_PREFIX_TERMS));
}
@ -70,6 +73,12 @@ public abstract class IndexConfig {
*/
public abstract int maxPages();
/**
* @return maximum number of total index query terms supported by the
* underlying index, or limited for performance reasons.
*/
public abstract int maxTerms();
/**
* @return maximum number of prefix terms per query supported by the
* underlying index, or limited for performance reasons. Not enforced for

View File

@ -33,6 +33,8 @@ import com.google.gerrit.server.query.change.LimitPredicate;
import com.google.gerrit.server.query.change.OrSource;
import com.google.inject.Inject;
import org.eclipse.jgit.util.MutableInteger;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.List;
@ -117,10 +119,13 @@ public class IndexRewriteImpl implements ChangeQueryRewriter {
}
private final IndexCollection indexes;
private final IndexConfig config;
@Inject
IndexRewriteImpl(IndexCollection indexes) {
IndexRewriteImpl(IndexCollection indexes,
IndexConfig config) {
this.indexes = indexes;
this.config = config;
}
@Override
@ -132,7 +137,8 @@ public class IndexRewriteImpl implements ChangeQueryRewriter {
// skipped results would have been filtered out by the enclosing AndSource.
limit += start;
Predicate<ChangeData> out = rewriteImpl(in, index, limit);
MutableInteger leafTerms = new MutableInteger();
Predicate<ChangeData> out = rewriteImpl(in, index, limit, leafTerms);
if (in == out || out instanceof IndexPredicate) {
return new IndexedChangeQuery(index, out, limit);
} else if (out == null /* cannot rewrite */) {
@ -148,6 +154,7 @@ public class IndexRewriteImpl implements ChangeQueryRewriter {
* @param in predicate to rewrite.
* @param index index whose schema determines which fields are indexed.
* @param limit maximum number of results to return.
* @param leafTerms number of leaf index query terms encountered so far.
* @return {@code null} if no part of this subtree can be queried in the
* index directly. {@code in} if this subtree and all its children can be
* queried directly in the index. Otherwise, a predicate that is
@ -157,8 +164,12 @@ public class IndexRewriteImpl implements ChangeQueryRewriter {
* support this predicate.
*/
private Predicate<ChangeData> rewriteImpl(Predicate<ChangeData> in,
ChangeIndex index, int limit) throws QueryParseException {
ChangeIndex index, int limit, MutableInteger leafTerms)
throws QueryParseException {
if (isIndexPredicate(in, index)) {
if (++leafTerms.value > config.maxTerms()) {
throw new QueryParseException("too many terms in query");
}
return in;
} else if (in instanceof LimitPredicate) {
// Replace any limits with the limit provided by the caller.
@ -174,7 +185,7 @@ public class IndexRewriteImpl implements ChangeQueryRewriter {
List<Predicate<ChangeData>> newChildren = Lists.newArrayListWithCapacity(n);
for (int i = 0; i < n; i++) {
Predicate<ChangeData> c = in.getChild(i);
Predicate<ChangeData> nc = rewriteImpl(c, index, limit);
Predicate<ChangeData> nc = rewriteImpl(c, index, limit, leafTerms);
if (nc == c) {
isIndexed.set(i);
newChildren.add(c);

View File

@ -133,8 +133,8 @@ public class InternalChangeQuery {
Schema<ChangeData> schema = schema(indexes);
int batchSize;
if (schema != null && schema.hasField(ChangeField.EXACT_COMMIT)) {
// TODO(dborowitz): Move to IndexConfig and use in more places.
batchSize = 500;
// Account for all commit predicates plus ref, project, status.
batchSize = indexConfig.maxTerms() - 3;
} else {
batchSize = indexConfig.maxPrefixTerms();
}
@ -146,6 +146,10 @@ public class InternalChangeQuery {
Branch.NameKey branch, List<String> hashes, int batchSize)
throws OrmException {
List<Predicate<ChangeData>> commits = commits(schema, hashes);
if (batchSize >= hashes.size()) {
return query(commitsOnBranchNotMerged(branch, commits));
}
int numBatches = (hashes.size() / batchSize) + 1;
List<Predicate<ChangeData>> queries = new ArrayList<>(numBatches);
for (List<Predicate<ChangeData>> batch

View File

@ -34,13 +34,18 @@ import com.google.gerrit.server.query.change.ChangeQueryBuilder;
import com.google.gerrit.server.query.change.OrSource;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.Set;
public class IndexRewriteTest {
@Rule
public ExpectedException exception = ExpectedException.none();
private FakeIndex index;
private IndexCollection indexes;
private ChangeQueryBuilder queryBuilder;
@ -52,7 +57,8 @@ public class IndexRewriteTest {
indexes = new IndexCollection();
indexes.setSearchIndex(index);
queryBuilder = new FakeQueryBuilder(indexes);
rewrite = new IndexRewriteImpl(indexes);
rewrite = new IndexRewriteImpl(indexes,
IndexConfig.create(0, 0, 3, 100));
}
@Test
@ -209,6 +215,17 @@ public class IndexRewriteTest {
out.getChildren());
}
@Test
public void testTooManyTerms() throws Exception {
String q = "file:a OR file:b OR file:c";
Predicate<ChangeData> in = parse(q);
assertEquals(query(in), rewrite(in));
exception.expect(QueryParseException.class);
exception.expectMessage("too many terms in query");
rewrite(parse(q + " OR file:d"));
}
private Predicate<ChangeData> parse(String query) throws QueryParseException {
return queryBuilder.parse(query);
}