Optimize RegexFilePredicate for common matches
A common pattern is file:^some/directory/.*, which we can efficiently match by sorting the paths and using the common prefix to narrow our search space to only the portion that has a chance of matching with us. Change-Id: I12cad5e5d5e8af3e1fe902b1e70828491e5cc048 Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.RepositoryNotFoundException;
|
||||
import org.eclipse.jgit.lib.Repository;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@@ -54,7 +55,9 @@ public class CommentSender extends ReplyToChangeSender {
|
||||
paths.add(p.getFileName());
|
||||
}
|
||||
}
|
||||
changeData.setCurrentFilePaths(paths);
|
||||
String[] names = paths.toArray(new String[paths.size()]);
|
||||
Arrays.sort(names);
|
||||
changeData.setCurrentFilePaths(names);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -29,6 +29,7 @@ import com.google.gwtorm.client.OrmException;
|
||||
import com.google.inject.Provider;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
@@ -39,7 +40,7 @@ public class ChangeData {
|
||||
private Collection<PatchSet> patches;
|
||||
private Collection<PatchSetApproval> approvals;
|
||||
private Collection<PatchSetApproval> currentApprovals;
|
||||
private Collection<String> currentFiles;
|
||||
private String[] currentFiles;
|
||||
private Collection<PatchLineComment> comments;
|
||||
private Collection<TrackingId> trackingIds;
|
||||
private CurrentUser visibleTo;
|
||||
@@ -53,11 +54,11 @@ public class ChangeData {
|
||||
change = c;
|
||||
}
|
||||
|
||||
public void setCurrentFilePaths(Collection<String> filePaths) {
|
||||
public void setCurrentFilePaths(String[] filePaths) {
|
||||
currentFiles = filePaths;
|
||||
}
|
||||
|
||||
public Collection<String> currentFilePaths(Provider<ReviewDb> db,
|
||||
public String[] currentFilePaths(Provider<ReviewDb> db,
|
||||
PatchListCache cache) throws OrmException {
|
||||
if (currentFiles == null) {
|
||||
Change c = change(db);
|
||||
@@ -88,7 +89,8 @@ public class ChangeData {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentFiles = r;
|
||||
currentFiles = r.toArray(new String[r.size()]);
|
||||
Arrays.sort(currentFiles);
|
||||
}
|
||||
return currentFiles;
|
||||
}
|
||||
|
@@ -20,35 +20,75 @@ import com.google.gerrit.server.query.OperatorPredicate;
|
||||
import com.google.gwtorm.client.OrmException;
|
||||
import com.google.inject.Provider;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
import dk.brics.automaton.Automaton;
|
||||
import dk.brics.automaton.RegExp;
|
||||
import dk.brics.automaton.RunAutomaton;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
class RegexFilePredicate extends OperatorPredicate<ChangeData> {
|
||||
private final Provider<ReviewDb> db;
|
||||
private final PatchListCache cache;
|
||||
private final Pattern pattern;
|
||||
private final RunAutomaton pattern;
|
||||
|
||||
private final String prefixBegin;
|
||||
private final String prefixEnd;
|
||||
private final int prefixLen;
|
||||
private final boolean prefixOnly;
|
||||
|
||||
RegexFilePredicate(Provider<ReviewDb> db, PatchListCache plc, String re) {
|
||||
super(ChangeQueryBuilder.FIELD_FILE, re);
|
||||
this.db = db;
|
||||
this.cache = plc;
|
||||
try {
|
||||
this.pattern = Pattern.compile(re);
|
||||
} catch (PatternSyntaxException e) {
|
||||
throw new IllegalArgumentException(e.getMessage());
|
||||
|
||||
if (re.startsWith("^")) {
|
||||
re = re.substring(1);
|
||||
}
|
||||
|
||||
if (re.endsWith("$") && !re.endsWith("\\$")) {
|
||||
re = re.substring(0, re.length() - 1);
|
||||
}
|
||||
|
||||
Automaton automaton = new RegExp(re).toAutomaton();
|
||||
prefixBegin = automaton.getCommonPrefix();
|
||||
prefixLen = prefixBegin.length();
|
||||
|
||||
if (0 < prefixLen) {
|
||||
char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
|
||||
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
|
||||
prefixOnly = re.equals(prefixBegin + ".*");
|
||||
} else {
|
||||
prefixEnd = "";
|
||||
prefixOnly = false;
|
||||
}
|
||||
|
||||
pattern = prefixOnly ? null : new RunAutomaton(automaton);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(ChangeData object) throws OrmException {
|
||||
Collection<String> files = object.currentFilePaths(db, cache);
|
||||
String[] files = object.currentFilePaths(db, cache);
|
||||
if (files != null) {
|
||||
for (String path : files) {
|
||||
if (pattern.matcher(path).find()) {
|
||||
int begin, end;
|
||||
|
||||
if (0 < prefixLen) {
|
||||
begin = find(files, prefixBegin);
|
||||
end = find(files, prefixEnd);
|
||||
} else {
|
||||
begin = 0;
|
||||
end = files.length;
|
||||
}
|
||||
|
||||
if (prefixOnly) {
|
||||
return begin < end;
|
||||
}
|
||||
|
||||
while (begin < end) {
|
||||
if (pattern.run(files[begin++])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
} else {
|
||||
@@ -60,6 +100,11 @@ class RegexFilePredicate extends OperatorPredicate<ChangeData> {
|
||||
}
|
||||
}
|
||||
|
||||
private static int find(String[] files, String p) {
|
||||
int r = Arrays.binarySearch(files, p);
|
||||
return r < 0 ? -(r + 1) : r;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCost() {
|
||||
return 1;
|
||||
|
@@ -0,0 +1,82 @@
|
||||
// Copyright (C) 2010 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.gerrit.server.query.change;
|
||||
|
||||
import com.google.gerrit.reviewdb.Change;
|
||||
import com.google.gwtorm.client.OrmException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class RegexFilePredicateTest extends TestCase {
|
||||
public void testPrefixOnlyOptimization() throws OrmException {
|
||||
RegexFilePredicate p = predicate("^a/b/.*");
|
||||
assertTrue(p.match(change("a/b/source.c")));
|
||||
assertFalse(p.match(change("source.c")));
|
||||
|
||||
assertTrue(p.match(change("a/b/source.c", "a/c/test")));
|
||||
assertFalse(p.match(change("a/bb/source.c")));
|
||||
}
|
||||
|
||||
public void testPrefixReducesSearchSpace() throws OrmException {
|
||||
RegexFilePredicate p = predicate("^a/b/.*\\.[ch]");
|
||||
assertTrue(p.match(change("a/b/source.c")));
|
||||
assertFalse(p.match(change("a/b/source.res")));
|
||||
assertFalse(p.match(change("source.res")));
|
||||
|
||||
assertTrue(p.match(change("a/b/a.a", "a/b/a.d", "a/b/a.h")));
|
||||
}
|
||||
|
||||
public void testFileExtension_Constant() throws OrmException {
|
||||
RegexFilePredicate p = predicate("^.*\\.res");
|
||||
assertTrue(p.match(change("test.res")));
|
||||
assertTrue(p.match(change("foo/bar/test.res")));
|
||||
assertFalse(p.match(change("test.res.bar")));
|
||||
}
|
||||
|
||||
public void testFileExtension_CharacterGroup() throws OrmException {
|
||||
RegexFilePredicate p = predicate("^.*\\.[ch]");
|
||||
assertTrue(p.match(change("test.c")));
|
||||
assertTrue(p.match(change("test.h")));
|
||||
assertFalse(p.match(change("test.C")));
|
||||
}
|
||||
|
||||
public void testEndOfString() throws OrmException {
|
||||
assertTrue(predicate("^a$").match(change("a")));
|
||||
assertFalse(predicate("^a$").match(change("a$")));
|
||||
|
||||
assertFalse(predicate("^a\\$").match(change("a")));
|
||||
assertTrue(predicate("^a\\$").match(change("a$")));
|
||||
}
|
||||
|
||||
public void testExactMatch() throws OrmException {
|
||||
RegexFilePredicate p = predicate("^foo.c");
|
||||
assertTrue(p.match(change("foo.c")));
|
||||
assertFalse(p.match(change("foo.cc")));
|
||||
assertFalse(p.match(change("bar.c")));
|
||||
}
|
||||
|
||||
private static RegexFilePredicate predicate(String pattern) {
|
||||
return new RegexFilePredicate(null, null, pattern);
|
||||
}
|
||||
|
||||
private static ChangeData change(String... files) {
|
||||
Arrays.sort(files);
|
||||
ChangeData cd = new ChangeData(new Change.Id(1));
|
||||
cd.setCurrentFilePaths(files);
|
||||
return cd;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user