Optimize RegexFilePredicate for common matches

A common pattern is file:^some/directory/.*, which we can efficiently
match by sorting the paths and using the common prefix to narrow our
search space to only the portion that has a chance of matching with us.

Change-Id: I12cad5e5d5e8af3e1fe902b1e70828491e5cc048
Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
Shawn O. Pearce
2010-08-21 15:37:57 -07:00
parent 3b175d5800
commit 8c11f0c458
4 changed files with 148 additions and 16 deletions

View File

@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.Repository;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -54,7 +55,9 @@ public class CommentSender extends ReplyToChangeSender {
paths.add(p.getFileName());
}
}
changeData.setCurrentFilePaths(paths);
String[] names = paths.toArray(new String[paths.size()]);
Arrays.sort(names);
changeData.setCurrentFilePaths(names);
}
@Override

View File

@@ -29,6 +29,7 @@ import com.google.gwtorm.client.OrmException;
import com.google.inject.Provider;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -39,7 +40,7 @@ public class ChangeData {
private Collection<PatchSet> patches;
private Collection<PatchSetApproval> approvals;
private Collection<PatchSetApproval> currentApprovals;
private Collection<String> currentFiles;
private String[] currentFiles;
private Collection<PatchLineComment> comments;
private Collection<TrackingId> trackingIds;
private CurrentUser visibleTo;
@@ -53,11 +54,11 @@ public class ChangeData {
change = c;
}
public void setCurrentFilePaths(Collection<String> filePaths) {
public void setCurrentFilePaths(String[] filePaths) {
currentFiles = filePaths;
}
public Collection<String> currentFilePaths(Provider<ReviewDb> db,
public String[] currentFilePaths(Provider<ReviewDb> db,
PatchListCache cache) throws OrmException {
if (currentFiles == null) {
Change c = change(db);
@@ -88,7 +89,8 @@ public class ChangeData {
break;
}
}
currentFiles = r;
currentFiles = r.toArray(new String[r.size()]);
Arrays.sort(currentFiles);
}
return currentFiles;
}

View File

@@ -20,35 +20,75 @@ import com.google.gerrit.server.query.OperatorPredicate;
import com.google.gwtorm.client.OrmException;
import com.google.inject.Provider;
import java.util.Collection;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import dk.brics.automaton.Automaton;
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;
import java.util.Arrays;
class RegexFilePredicate extends OperatorPredicate<ChangeData> {
private final Provider<ReviewDb> db;
private final PatchListCache cache;
private final Pattern pattern;
private final RunAutomaton pattern;
private final String prefixBegin;
private final String prefixEnd;
private final int prefixLen;
private final boolean prefixOnly;
RegexFilePredicate(Provider<ReviewDb> db, PatchListCache plc, String re) {
super(ChangeQueryBuilder.FIELD_FILE, re);
this.db = db;
this.cache = plc;
try {
this.pattern = Pattern.compile(re);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException(e.getMessage());
if (re.startsWith("^")) {
re = re.substring(1);
}
if (re.endsWith("$") && !re.endsWith("\\$")) {
re = re.substring(0, re.length() - 1);
}
Automaton automaton = new RegExp(re).toAutomaton();
prefixBegin = automaton.getCommonPrefix();
prefixLen = prefixBegin.length();
if (0 < prefixLen) {
char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
prefixOnly = re.equals(prefixBegin + ".*");
} else {
prefixEnd = "";
prefixOnly = false;
}
pattern = prefixOnly ? null : new RunAutomaton(automaton);
}
@Override
public boolean match(ChangeData object) throws OrmException {
Collection<String> files = object.currentFilePaths(db, cache);
String[] files = object.currentFilePaths(db, cache);
if (files != null) {
for (String path : files) {
if (pattern.matcher(path).find()) {
int begin, end;
if (0 < prefixLen) {
begin = find(files, prefixBegin);
end = find(files, prefixEnd);
} else {
begin = 0;
end = files.length;
}
if (prefixOnly) {
return begin < end;
}
while (begin < end) {
if (pattern.run(files[begin++])) {
return true;
}
}
return false;
} else {
@@ -60,6 +100,11 @@ class RegexFilePredicate extends OperatorPredicate<ChangeData> {
}
}
private static int find(String[] files, String p) {
int r = Arrays.binarySearch(files, p);
return r < 0 ? -(r + 1) : r;
}
@Override
public int getCost() {
return 1;

View File

@@ -0,0 +1,82 @@
// Copyright (C) 2010 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.query.change;
import com.google.gerrit.reviewdb.Change;
import com.google.gwtorm.client.OrmException;
import junit.framework.TestCase;
import java.util.Arrays;
public class RegexFilePredicateTest extends TestCase {
public void testPrefixOnlyOptimization() throws OrmException {
RegexFilePredicate p = predicate("^a/b/.*");
assertTrue(p.match(change("a/b/source.c")));
assertFalse(p.match(change("source.c")));
assertTrue(p.match(change("a/b/source.c", "a/c/test")));
assertFalse(p.match(change("a/bb/source.c")));
}
public void testPrefixReducesSearchSpace() throws OrmException {
RegexFilePredicate p = predicate("^a/b/.*\\.[ch]");
assertTrue(p.match(change("a/b/source.c")));
assertFalse(p.match(change("a/b/source.res")));
assertFalse(p.match(change("source.res")));
assertTrue(p.match(change("a/b/a.a", "a/b/a.d", "a/b/a.h")));
}
public void testFileExtension_Constant() throws OrmException {
RegexFilePredicate p = predicate("^.*\\.res");
assertTrue(p.match(change("test.res")));
assertTrue(p.match(change("foo/bar/test.res")));
assertFalse(p.match(change("test.res.bar")));
}
public void testFileExtension_CharacterGroup() throws OrmException {
RegexFilePredicate p = predicate("^.*\\.[ch]");
assertTrue(p.match(change("test.c")));
assertTrue(p.match(change("test.h")));
assertFalse(p.match(change("test.C")));
}
public void testEndOfString() throws OrmException {
assertTrue(predicate("^a$").match(change("a")));
assertFalse(predicate("^a$").match(change("a$")));
assertFalse(predicate("^a\\$").match(change("a")));
assertTrue(predicate("^a\\$").match(change("a$")));
}
public void testExactMatch() throws OrmException {
RegexFilePredicate p = predicate("^foo.c");
assertTrue(p.match(change("foo.c")));
assertFalse(p.match(change("foo.cc")));
assertFalse(p.match(change("bar.c")));
}
private static RegexFilePredicate predicate(String pattern) {
return new RegexFilePredicate(null, null, pattern);
}
private static ChangeData change(String... files) {
Arrays.sort(files);
ChangeData cd = new ChangeData(new Change.Id(1));
cd.setCurrentFilePaths(files);
return cd;
}
}