Factor out a class for efficient regex search over lists
RegexPathPredicate had some clever logic for avoiding matching regular expressions over an entire search list when we can identify a common prefix that all matching results must have. Factor this out into a new class, RegexListSearcher, add tests, and use it from ListProjects. Change-Id: Ie08ad8dcf09708a8aa3efcfbed6c4ee6879f80c7
This commit is contained in:
@@ -16,6 +16,7 @@ package com.google.gerrit.server.project;
|
|||||||
|
|
||||||
import com.google.common.base.Predicate;
|
import com.google.common.base.Predicate;
|
||||||
import com.google.common.base.Strings;
|
import com.google.common.base.Strings;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
@@ -38,14 +39,12 @@ import com.google.gerrit.server.WebLinks;
|
|||||||
import com.google.gerrit.server.account.GroupCache;
|
import com.google.gerrit.server.account.GroupCache;
|
||||||
import com.google.gerrit.server.account.GroupControl;
|
import com.google.gerrit.server.account.GroupControl;
|
||||||
import com.google.gerrit.server.git.GitRepositoryManager;
|
import com.google.gerrit.server.git.GitRepositoryManager;
|
||||||
|
import com.google.gerrit.server.util.RegexListSearcher;
|
||||||
import com.google.gerrit.server.util.TreeFormatter;
|
import com.google.gerrit.server.util.TreeFormatter;
|
||||||
import com.google.gson.reflect.TypeToken;
|
import com.google.gson.reflect.TypeToken;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Provider;
|
import com.google.inject.Provider;
|
||||||
|
|
||||||
import dk.brics.automaton.RegExp;
|
|
||||||
import dk.brics.automaton.RunAutomaton;
|
|
||||||
|
|
||||||
import org.eclipse.jgit.errors.RepositoryNotFoundException;
|
import org.eclipse.jgit.errors.RepositoryNotFoundException;
|
||||||
import org.eclipse.jgit.lib.Constants;
|
import org.eclipse.jgit.lib.Constants;
|
||||||
import org.eclipse.jgit.lib.Ref;
|
import org.eclipse.jgit.lib.Ref;
|
||||||
@@ -462,27 +461,18 @@ public class ListProjects implements RestReadView<TopLevelResource> {
|
|||||||
});
|
});
|
||||||
} else if (matchRegex != null) {
|
} else if (matchRegex != null) {
|
||||||
checkMatchOptions(matchPrefix == null && matchSubstring == null);
|
checkMatchOptions(matchPrefix == null && matchSubstring == null);
|
||||||
if (matchRegex.startsWith("^")) {
|
RegexListSearcher<Project.NameKey> searcher;
|
||||||
matchRegex = matchRegex.substring(1);
|
|
||||||
}
|
|
||||||
if (matchRegex.endsWith("$") && !matchRegex.endsWith("\\$")) {
|
|
||||||
matchRegex = matchRegex.substring(0, matchRegex.length() - 1);
|
|
||||||
}
|
|
||||||
if (matchRegex.equals(".*")) {
|
|
||||||
return projectCache.all();
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
final RunAutomaton a =
|
searcher = new RegexListSearcher<Project.NameKey>(matchRegex) {
|
||||||
new RunAutomaton(new RegExp(matchRegex).toAutomaton());
|
@Override
|
||||||
return Iterables.filter(projectCache.all(),
|
public String apply(Project.NameKey in) {
|
||||||
new Predicate<Project.NameKey>() {
|
return in.get();
|
||||||
public boolean apply(Project.NameKey in) {
|
|
||||||
return a.run(in.get());
|
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
throw new BadRequestException(e.getMessage());
|
throw new BadRequestException(e.getMessage());
|
||||||
}
|
}
|
||||||
|
return searcher.search(ImmutableList.copyOf(projectCache.all()));
|
||||||
} else {
|
} else {
|
||||||
return projectCache.all();
|
return projectCache.all();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,76 +16,21 @@ package com.google.gerrit.server.query.change;
|
|||||||
|
|
||||||
import com.google.gerrit.server.index.ChangeField;
|
import com.google.gerrit.server.index.ChangeField;
|
||||||
import com.google.gerrit.server.index.RegexPredicate;
|
import com.google.gerrit.server.index.RegexPredicate;
|
||||||
|
import com.google.gerrit.server.util.RegexListSearcher;
|
||||||
import com.google.gwtorm.server.OrmException;
|
import com.google.gwtorm.server.OrmException;
|
||||||
|
|
||||||
import dk.brics.automaton.Automaton;
|
|
||||||
import dk.brics.automaton.RegExp;
|
|
||||||
import dk.brics.automaton.RunAutomaton;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
class RegexPathPredicate extends RegexPredicate<ChangeData> {
|
class RegexPathPredicate extends RegexPredicate<ChangeData> {
|
||||||
private final RunAutomaton pattern;
|
|
||||||
|
|
||||||
private final String prefixBegin;
|
|
||||||
private final String prefixEnd;
|
|
||||||
private final int prefixLen;
|
|
||||||
private final boolean prefixOnly;
|
|
||||||
|
|
||||||
RegexPathPredicate(String fieldName, String re) {
|
RegexPathPredicate(String fieldName, String re) {
|
||||||
super(ChangeField.PATH, re);
|
super(ChangeField.PATH, re);
|
||||||
|
|
||||||
if (re.startsWith("^")) {
|
|
||||||
re = re.substring(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (re.endsWith("$") && !re.endsWith("\\$")) {
|
|
||||||
re = re.substring(0, re.length() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Automaton automaton = new RegExp(re).toAutomaton();
|
|
||||||
prefixBegin = automaton.getCommonPrefix();
|
|
||||||
prefixLen = prefixBegin.length();
|
|
||||||
|
|
||||||
if (0 < prefixLen) {
|
|
||||||
char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
|
|
||||||
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
|
|
||||||
prefixOnly = re.equals(prefixBegin + ".*");
|
|
||||||
} else {
|
|
||||||
prefixEnd = "";
|
|
||||||
prefixOnly = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
pattern = prefixOnly ? null : new RunAutomaton(automaton);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean match(ChangeData object) throws OrmException {
|
public boolean match(ChangeData object) throws OrmException {
|
||||||
List<String> files = object.currentFilePaths();
|
List<String> files = object.currentFilePaths();
|
||||||
if (files != null) {
|
if (files != null) {
|
||||||
int begin, end;
|
return RegexListSearcher.ofStrings(getValue()).hasMatch(files);
|
||||||
|
|
||||||
if (0 < prefixLen) {
|
|
||||||
begin = find(files, prefixBegin);
|
|
||||||
end = find(files, prefixEnd);
|
|
||||||
} else {
|
|
||||||
begin = 0;
|
|
||||||
end = files.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prefixOnly) {
|
|
||||||
return begin < end;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (begin < end) {
|
|
||||||
if (pattern.run(files.get(begin++))) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// The ChangeData can't do expensive lookups right now. Bypass
|
// The ChangeData can't do expensive lookups right now. Bypass
|
||||||
// them and include the result anyway. We might be able to do
|
// them and include the result anyway. We might be able to do
|
||||||
@@ -95,11 +40,6 @@ class RegexPathPredicate extends RegexPredicate<ChangeData> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int find(List<String> files, String p) {
|
|
||||||
int r = Collections.binarySearch(files, p);
|
|
||||||
return r < 0 ? -(r + 1) : r;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getCost() {
|
public int getCost() {
|
||||||
return 1;
|
return 1;
|
||||||
|
|||||||
@@ -0,0 +1,112 @@
|
|||||||
|
// Copyright (C) 2014 The Android Open Source Project
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.gerrit.server.util;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
|
||||||
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.base.Predicate;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.primitives.Chars;
|
||||||
|
|
||||||
|
import dk.brics.automaton.Automaton;
|
||||||
|
import dk.brics.automaton.RegExp;
|
||||||
|
import dk.brics.automaton.RunAutomaton;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Helper to search sorted lists for elements matching a regex. */
|
||||||
|
public abstract class RegexListSearcher<T> implements Function<T, String> {
|
||||||
|
public static RegexListSearcher<String> ofStrings(String re) {
|
||||||
|
return new RegexListSearcher<String>(re) {
|
||||||
|
@Override
|
||||||
|
public String apply(String in) {
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private final RunAutomaton pattern;
|
||||||
|
|
||||||
|
private final String prefixBegin;
|
||||||
|
private final String prefixEnd;
|
||||||
|
private final int prefixLen;
|
||||||
|
private final boolean prefixOnly;
|
||||||
|
|
||||||
|
public RegexListSearcher(String re) {
|
||||||
|
if (re.startsWith("^")) {
|
||||||
|
re = re.substring(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (re.endsWith("$") && !re.endsWith("\\$")) {
|
||||||
|
re = re.substring(0, re.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Automaton automaton = new RegExp(re).toAutomaton();
|
||||||
|
prefixBegin = automaton.getCommonPrefix();
|
||||||
|
prefixLen = prefixBegin.length();
|
||||||
|
|
||||||
|
if (0 < prefixLen) {
|
||||||
|
char max = Chars.checkedCast(prefixBegin.charAt(prefixLen - 1) + 1);
|
||||||
|
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
|
||||||
|
prefixOnly = re.equals(prefixBegin + ".*");
|
||||||
|
} else {
|
||||||
|
prefixEnd = "";
|
||||||
|
prefixOnly = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern = prefixOnly ? null : new RunAutomaton(automaton);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterable<T> search(List<T> list) {
|
||||||
|
checkNotNull(list);
|
||||||
|
int begin, end;
|
||||||
|
|
||||||
|
if (0 < prefixLen) {
|
||||||
|
// Assumes many consecutive elements may have the same prefix, so the cost
|
||||||
|
// of two binary searches is less than iterating to find the endpoints.
|
||||||
|
begin = find(list, prefixBegin);
|
||||||
|
end = find(list, prefixEnd);
|
||||||
|
} else {
|
||||||
|
begin = 0;
|
||||||
|
end = list.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prefixOnly) {
|
||||||
|
return begin < end ? list.subList(begin, end) : ImmutableList.<T> of();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Iterables.filter(
|
||||||
|
list.subList(begin, end),
|
||||||
|
new Predicate<T>() {
|
||||||
|
@Override
|
||||||
|
public boolean apply(T in) {
|
||||||
|
return pattern.run(RegexListSearcher.this.apply(in));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasMatch(List<T> list) {
|
||||||
|
return !Iterables.isEmpty(search(list));
|
||||||
|
}
|
||||||
|
|
||||||
|
private int find(List<T> list, String p) {
|
||||||
|
int r = Collections.binarySearch(Lists.transform(list, this), p);
|
||||||
|
return r < 0 ? -(r + 1) : r;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (C) 2014 The Android Open Source Project
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.gerrit.server.util;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.Ordering;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class RegexListSearcherTest {
|
||||||
|
private static final List<String> EMPTY = ImmutableList.of();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void emptyList() {
|
||||||
|
assertSearchReturns(EMPTY, "pat", EMPTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void hasMatch() {
|
||||||
|
List<String> list = ImmutableList.of("bar", "foo", "quux");
|
||||||
|
assertTrue(RegexListSearcher.ofStrings("foo").hasMatch(list));
|
||||||
|
assertFalse(RegexListSearcher.ofStrings("xyz").hasMatch(list));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void anchors() {
|
||||||
|
List<String> list = ImmutableList.of("foo");
|
||||||
|
assertSearchReturns(list, "^f.*", list);
|
||||||
|
assertSearchReturns(list, "^f.*o$", list);
|
||||||
|
assertSearchReturns(list, "f.*o$", list);
|
||||||
|
assertSearchReturns(list, "f.*o$", list);
|
||||||
|
assertSearchReturns(EMPTY, "^.*\\$", list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void noCommonPrefix() {
|
||||||
|
List<String> list = ImmutableList.of("bar", "foo", "quux");
|
||||||
|
assertSearchReturns(ImmutableList.of("foo"), "f.*", list);
|
||||||
|
assertSearchReturns(ImmutableList.of("foo"), ".*o.*", list);
|
||||||
|
assertSearchReturns(ImmutableList.of("bar", "foo", "quux"), ".*[aou].*",
|
||||||
|
list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void commonPrefix() {
|
||||||
|
List<String> list = ImmutableList.of(
|
||||||
|
"bar",
|
||||||
|
"baz",
|
||||||
|
"foo1",
|
||||||
|
"foo2",
|
||||||
|
"foo3",
|
||||||
|
"quux");
|
||||||
|
assertSearchReturns(ImmutableList.of("bar", "baz"), "b.*", list);
|
||||||
|
assertSearchReturns(ImmutableList.of("foo1", "foo2"), "foo[12]", list);
|
||||||
|
assertSearchReturns(ImmutableList.of("foo1", "foo2", "foo3"), "foo.*",
|
||||||
|
list);
|
||||||
|
assertSearchReturns(ImmutableList.of("quux"), "q.*", list);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSearchReturns(List<?> expected, String re,
|
||||||
|
List<String> inputs) {
|
||||||
|
assertTrue(Ordering.natural().isOrdered(inputs));
|
||||||
|
assertEquals(expected,
|
||||||
|
ImmutableList.copyOf(RegexListSearcher.ofStrings(re).search(inputs)));
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user