Factor out a class for efficient regex search over lists

RegexPathPredicate had some clever logic for avoiding matching regular
expressions over an entire search list when we can identify a common
prefix that all matching results must have. Factor this out into a new
class, RegexListSearcher, add tests, and use it from ListProjects.

Change-Id: Ie08ad8dcf09708a8aa3efcfbed6c4ee6879f80c7
This commit is contained in:
Dave Borowitz
2014-08-07 13:19:23 -07:00
parent 1e0e85045e
commit d4ab000a20
4 changed files with 208 additions and 82 deletions

View File

@@ -16,76 +16,21 @@ package com.google.gerrit.server.query.change;
import com.google.gerrit.server.index.ChangeField;
import com.google.gerrit.server.index.RegexPredicate;
import com.google.gerrit.server.util.RegexListSearcher;
import com.google.gwtorm.server.OrmException;
import dk.brics.automaton.Automaton;
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;
import java.util.Collections;
import java.util.List;
class RegexPathPredicate extends RegexPredicate<ChangeData> {
private final RunAutomaton pattern;
private final String prefixBegin;
private final String prefixEnd;
private final int prefixLen;
private final boolean prefixOnly;
RegexPathPredicate(String fieldName, String re) {
super(ChangeField.PATH, re);
if (re.startsWith("^")) {
re = re.substring(1);
}
if (re.endsWith("$") && !re.endsWith("\\$")) {
re = re.substring(0, re.length() - 1);
}
Automaton automaton = new RegExp(re).toAutomaton();
prefixBegin = automaton.getCommonPrefix();
prefixLen = prefixBegin.length();
if (0 < prefixLen) {
char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
prefixOnly = re.equals(prefixBegin + ".*");
} else {
prefixEnd = "";
prefixOnly = false;
}
pattern = prefixOnly ? null : new RunAutomaton(automaton);
}
@Override
public boolean match(ChangeData object) throws OrmException {
List<String> files = object.currentFilePaths();
if (files != null) {
int begin, end;
if (0 < prefixLen) {
begin = find(files, prefixBegin);
end = find(files, prefixEnd);
} else {
begin = 0;
end = files.size();
}
if (prefixOnly) {
return begin < end;
}
while (begin < end) {
if (pattern.run(files.get(begin++))) {
return true;
}
}
return false;
return RegexListSearcher.ofStrings(getValue()).hasMatch(files);
} else {
// The ChangeData can't do expensive lookups right now. Bypass
// them and include the result anyway. We might be able to do
@@ -95,11 +40,6 @@ class RegexPathPredicate extends RegexPredicate<ChangeData> {
}
}
private static int find(List<String> files, String p) {
int r = Collections.binarySearch(files, p);
return r < 0 ? -(r + 1) : r;
}
@Override
public int getCost() {
return 1;