Factor out a class for efficient regex search over lists
RegexPathPredicate had some clever logic for avoiding matching regular expressions over an entire search list when we can identify a common prefix that all matching results must have. Factor this out into a new class, RegexListSearcher, add tests, and use it from ListProjects. Change-Id: Ie08ad8dcf09708a8aa3efcfbed6c4ee6879f80c7
This commit is contained in:
@@ -16,6 +16,7 @@ package com.google.gerrit.server.project;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
@@ -38,14 +39,12 @@ import com.google.gerrit.server.WebLinks;
|
||||
import com.google.gerrit.server.account.GroupCache;
|
||||
import com.google.gerrit.server.account.GroupControl;
|
||||
import com.google.gerrit.server.git.GitRepositoryManager;
|
||||
import com.google.gerrit.server.util.RegexListSearcher;
|
||||
import com.google.gerrit.server.util.TreeFormatter;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Provider;
|
||||
|
||||
import dk.brics.automaton.RegExp;
|
||||
import dk.brics.automaton.RunAutomaton;
|
||||
|
||||
import org.eclipse.jgit.errors.RepositoryNotFoundException;
|
||||
import org.eclipse.jgit.lib.Constants;
|
||||
import org.eclipse.jgit.lib.Ref;
|
||||
@@ -462,27 +461,18 @@ public class ListProjects implements RestReadView<TopLevelResource> {
|
||||
});
|
||||
} else if (matchRegex != null) {
|
||||
checkMatchOptions(matchPrefix == null && matchSubstring == null);
|
||||
if (matchRegex.startsWith("^")) {
|
||||
matchRegex = matchRegex.substring(1);
|
||||
}
|
||||
if (matchRegex.endsWith("$") && !matchRegex.endsWith("\\$")) {
|
||||
matchRegex = matchRegex.substring(0, matchRegex.length() - 1);
|
||||
}
|
||||
if (matchRegex.equals(".*")) {
|
||||
return projectCache.all();
|
||||
}
|
||||
RegexListSearcher<Project.NameKey> searcher;
|
||||
try {
|
||||
final RunAutomaton a =
|
||||
new RunAutomaton(new RegExp(matchRegex).toAutomaton());
|
||||
return Iterables.filter(projectCache.all(),
|
||||
new Predicate<Project.NameKey>() {
|
||||
public boolean apply(Project.NameKey in) {
|
||||
return a.run(in.get());
|
||||
searcher = new RegexListSearcher<Project.NameKey>(matchRegex) {
|
||||
@Override
|
||||
public String apply(Project.NameKey in) {
|
||||
return in.get();
|
||||
}
|
||||
});
|
||||
};
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new BadRequestException(e.getMessage());
|
||||
}
|
||||
return searcher.search(ImmutableList.copyOf(projectCache.all()));
|
||||
} else {
|
||||
return projectCache.all();
|
||||
}
|
||||
|
||||
@@ -16,76 +16,21 @@ package com.google.gerrit.server.query.change;
|
||||
|
||||
import com.google.gerrit.server.index.ChangeField;
|
||||
import com.google.gerrit.server.index.RegexPredicate;
|
||||
import com.google.gerrit.server.util.RegexListSearcher;
|
||||
import com.google.gwtorm.server.OrmException;
|
||||
|
||||
import dk.brics.automaton.Automaton;
|
||||
import dk.brics.automaton.RegExp;
|
||||
import dk.brics.automaton.RunAutomaton;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
class RegexPathPredicate extends RegexPredicate<ChangeData> {
|
||||
private final RunAutomaton pattern;
|
||||
|
||||
private final String prefixBegin;
|
||||
private final String prefixEnd;
|
||||
private final int prefixLen;
|
||||
private final boolean prefixOnly;
|
||||
|
||||
RegexPathPredicate(String fieldName, String re) {
|
||||
super(ChangeField.PATH, re);
|
||||
|
||||
if (re.startsWith("^")) {
|
||||
re = re.substring(1);
|
||||
}
|
||||
|
||||
if (re.endsWith("$") && !re.endsWith("\\$")) {
|
||||
re = re.substring(0, re.length() - 1);
|
||||
}
|
||||
|
||||
Automaton automaton = new RegExp(re).toAutomaton();
|
||||
prefixBegin = automaton.getCommonPrefix();
|
||||
prefixLen = prefixBegin.length();
|
||||
|
||||
if (0 < prefixLen) {
|
||||
char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
|
||||
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
|
||||
prefixOnly = re.equals(prefixBegin + ".*");
|
||||
} else {
|
||||
prefixEnd = "";
|
||||
prefixOnly = false;
|
||||
}
|
||||
|
||||
pattern = prefixOnly ? null : new RunAutomaton(automaton);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(ChangeData object) throws OrmException {
|
||||
List<String> files = object.currentFilePaths();
|
||||
if (files != null) {
|
||||
int begin, end;
|
||||
|
||||
if (0 < prefixLen) {
|
||||
begin = find(files, prefixBegin);
|
||||
end = find(files, prefixEnd);
|
||||
} else {
|
||||
begin = 0;
|
||||
end = files.size();
|
||||
}
|
||||
|
||||
if (prefixOnly) {
|
||||
return begin < end;
|
||||
}
|
||||
|
||||
while (begin < end) {
|
||||
if (pattern.run(files.get(begin++))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
return RegexListSearcher.ofStrings(getValue()).hasMatch(files);
|
||||
} else {
|
||||
// The ChangeData can't do expensive lookups right now. Bypass
|
||||
// them and include the result anyway. We might be able to do
|
||||
@@ -95,11 +40,6 @@ class RegexPathPredicate extends RegexPredicate<ChangeData> {
|
||||
}
|
||||
}
|
||||
|
||||
private static int find(List<String> files, String p) {
|
||||
int r = Collections.binarySearch(files, p);
|
||||
return r < 0 ? -(r + 1) : r;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCost() {
|
||||
return 1;
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
// Copyright (C) 2014 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.gerrit.server.util;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.primitives.Chars;
|
||||
|
||||
import dk.brics.automaton.Automaton;
|
||||
import dk.brics.automaton.RegExp;
|
||||
import dk.brics.automaton.RunAutomaton;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/** Helper to search sorted lists for elements matching a regex. */
|
||||
public abstract class RegexListSearcher<T> implements Function<T, String> {
|
||||
public static RegexListSearcher<String> ofStrings(String re) {
|
||||
return new RegexListSearcher<String>(re) {
|
||||
@Override
|
||||
public String apply(String in) {
|
||||
return in;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private final RunAutomaton pattern;
|
||||
|
||||
private final String prefixBegin;
|
||||
private final String prefixEnd;
|
||||
private final int prefixLen;
|
||||
private final boolean prefixOnly;
|
||||
|
||||
public RegexListSearcher(String re) {
|
||||
if (re.startsWith("^")) {
|
||||
re = re.substring(1);
|
||||
}
|
||||
|
||||
if (re.endsWith("$") && !re.endsWith("\\$")) {
|
||||
re = re.substring(0, re.length() - 1);
|
||||
}
|
||||
|
||||
Automaton automaton = new RegExp(re).toAutomaton();
|
||||
prefixBegin = automaton.getCommonPrefix();
|
||||
prefixLen = prefixBegin.length();
|
||||
|
||||
if (0 < prefixLen) {
|
||||
char max = Chars.checkedCast(prefixBegin.charAt(prefixLen - 1) + 1);
|
||||
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
|
||||
prefixOnly = re.equals(prefixBegin + ".*");
|
||||
} else {
|
||||
prefixEnd = "";
|
||||
prefixOnly = false;
|
||||
}
|
||||
|
||||
pattern = prefixOnly ? null : new RunAutomaton(automaton);
|
||||
}
|
||||
|
||||
public Iterable<T> search(List<T> list) {
|
||||
checkNotNull(list);
|
||||
int begin, end;
|
||||
|
||||
if (0 < prefixLen) {
|
||||
// Assumes many consecutive elements may have the same prefix, so the cost
|
||||
// of two binary searches is less than iterating to find the endpoints.
|
||||
begin = find(list, prefixBegin);
|
||||
end = find(list, prefixEnd);
|
||||
} else {
|
||||
begin = 0;
|
||||
end = list.size();
|
||||
}
|
||||
|
||||
if (prefixOnly) {
|
||||
return begin < end ? list.subList(begin, end) : ImmutableList.<T> of();
|
||||
}
|
||||
|
||||
return Iterables.filter(
|
||||
list.subList(begin, end),
|
||||
new Predicate<T>() {
|
||||
@Override
|
||||
public boolean apply(T in) {
|
||||
return pattern.run(RegexListSearcher.this.apply(in));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public boolean hasMatch(List<T> list) {
|
||||
return !Iterables.isEmpty(search(list));
|
||||
}
|
||||
|
||||
private int find(List<T> list, String p) {
|
||||
int r = Collections.binarySearch(Lists.transform(list, this), p);
|
||||
return r < 0 ? -(r + 1) : r;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright (C) 2014 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.gerrit.server.util;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Ordering;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class RegexListSearcherTest {
|
||||
private static final List<String> EMPTY = ImmutableList.of();
|
||||
|
||||
@Test
|
||||
public void emptyList() {
|
||||
assertSearchReturns(EMPTY, "pat", EMPTY);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void hasMatch() {
|
||||
List<String> list = ImmutableList.of("bar", "foo", "quux");
|
||||
assertTrue(RegexListSearcher.ofStrings("foo").hasMatch(list));
|
||||
assertFalse(RegexListSearcher.ofStrings("xyz").hasMatch(list));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void anchors() {
|
||||
List<String> list = ImmutableList.of("foo");
|
||||
assertSearchReturns(list, "^f.*", list);
|
||||
assertSearchReturns(list, "^f.*o$", list);
|
||||
assertSearchReturns(list, "f.*o$", list);
|
||||
assertSearchReturns(list, "f.*o$", list);
|
||||
assertSearchReturns(EMPTY, "^.*\\$", list);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noCommonPrefix() {
|
||||
List<String> list = ImmutableList.of("bar", "foo", "quux");
|
||||
assertSearchReturns(ImmutableList.of("foo"), "f.*", list);
|
||||
assertSearchReturns(ImmutableList.of("foo"), ".*o.*", list);
|
||||
assertSearchReturns(ImmutableList.of("bar", "foo", "quux"), ".*[aou].*",
|
||||
list);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void commonPrefix() {
|
||||
List<String> list = ImmutableList.of(
|
||||
"bar",
|
||||
"baz",
|
||||
"foo1",
|
||||
"foo2",
|
||||
"foo3",
|
||||
"quux");
|
||||
assertSearchReturns(ImmutableList.of("bar", "baz"), "b.*", list);
|
||||
assertSearchReturns(ImmutableList.of("foo1", "foo2"), "foo[12]", list);
|
||||
assertSearchReturns(ImmutableList.of("foo1", "foo2", "foo3"), "foo.*",
|
||||
list);
|
||||
assertSearchReturns(ImmutableList.of("quux"), "q.*", list);
|
||||
}
|
||||
|
||||
private void assertSearchReturns(List<?> expected, String re,
|
||||
List<String> inputs) {
|
||||
assertTrue(Ordering.natural().isOrdered(inputs));
|
||||
assertEquals(expected,
|
||||
ImmutableList.copyOf(RegexListSearcher.ofStrings(re).search(inputs)));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user