Tweak Lucene analyzer's definition of a whole word
When words were linked with '_' or '.' Lucene would treat it as one word. Change it so that Lucene treats them as separate words. The new analyzer wraps StandardAnalyzer and changes the behavior such that '_' and '.' are treated as whitespace. A reindex is necessary. Bug: issue 2822 Change-Id: Ibed25695bf8e60335a2486e5e988a7c67b3da37d
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2014 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.gerrit.lucene;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This analyzer can be used to provide custom char mappings.
|
||||
*
|
||||
* <p>Example usage:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* {@code
|
||||
* Map<String,String> customMapping = new HashMap<>();
|
||||
* customMapping.put("_", " ");
|
||||
* customMapping.put(".", " ");
|
||||
*
|
||||
* CustomMappingAnalyzer analyzer =
|
||||
* new CustomMappingAnalyzer(new StandardAnalyzer(version), customMapping);
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
public class CustomMappingAnalyzer extends AnalyzerWrapper {
|
||||
private Analyzer delegate;
|
||||
private Map<String, String> customMappings;
|
||||
|
||||
public CustomMappingAnalyzer(Analyzer delegate,
|
||||
Map<String, String> customMappings) {
|
||||
super(delegate.getReuseStrategy());
|
||||
this.delegate = delegate;
|
||||
this.customMappings = customMappings;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader wrapReader(String fieldName, Reader reader) {
|
||||
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
|
||||
for (Map.Entry<String, String> e : customMappings.entrySet()) {
|
||||
builder.add(e.getKey(), e.getValue());
|
||||
}
|
||||
return new MappingCharFilter(builder.build(), reader);
|
||||
}
|
||||
}
|
@@ -58,7 +58,6 @@ import com.google.inject.Provider;
|
||||
import com.google.inject.assistedinject.Assisted;
|
||||
import com.google.inject.assistedinject.AssistedInject;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.document.Document;
|
||||
@@ -122,6 +121,8 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
private static final String ID_FIELD = ChangeField.LEGACY_ID.getName();
|
||||
private static final ImmutableSet<String> FIELDS = ImmutableSet.of(
|
||||
ADDED_FIELD, APPROVAL_FIELD, CHANGE_FIELD, DELETED_FIELD, ID_FIELD);
|
||||
private static final Map<String, String> CUSTOM_CHAR_MAPPING = ImmutableMap.of(
|
||||
"_", " ", ".", " ");
|
||||
|
||||
private static final Map<Schema<ChangeData>, Version> LUCENE_VERSIONS;
|
||||
static {
|
||||
@@ -173,8 +174,10 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
private long commitWithinMs;
|
||||
|
||||
private GerritIndexWriterConfig(Version version, Config cfg, String name) {
|
||||
luceneConfig = new IndexWriterConfig(version,
|
||||
new StandardAnalyzer(version, CharArraySet.EMPTY_SET));
|
||||
CustomMappingAnalyzer analyzer =
|
||||
new CustomMappingAnalyzer(new StandardAnalyzer(version,
|
||||
CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);
|
||||
luceneConfig = new IndexWriterConfig(version, analyzer);
|
||||
luceneConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
||||
double m = 1 << 20;
|
||||
luceneConfig.setRAMBufferSizeMB(cfg.getLong(
|
||||
@@ -237,9 +240,9 @@ public class LuceneChangeIndex implements ChangeIndex {
|
||||
Version luceneVersion = checkNotNull(
|
||||
LUCENE_VERSIONS.get(schema),
|
||||
"unknown Lucene version for index schema: %s", schema);
|
||||
|
||||
Analyzer analyzer =
|
||||
new StandardAnalyzer(luceneVersion, CharArraySet.EMPTY_SET);
|
||||
CustomMappingAnalyzer analyzer =
|
||||
new CustomMappingAnalyzer(new StandardAnalyzer(luceneVersion,
|
||||
CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);
|
||||
queryBuilder = new QueryBuilder(schema, analyzer);
|
||||
|
||||
GerritIndexWriterConfig openConfig =
|
||||
|
@@ -14,12 +14,39 @@
|
||||
|
||||
package com.google.gerrit.server.query.change;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import com.google.gerrit.reviewdb.client.Change;
|
||||
import com.google.gerrit.testutil.InMemoryModule;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Injector;
|
||||
|
||||
import org.eclipse.jgit.internal.storage.dfs.InMemoryRepository;
|
||||
import org.eclipse.jgit.junit.TestRepository;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.junit.Test;
|
||||
|
||||
public class LuceneQueryChangesTest extends AbstractQueryChangesTest {
|
||||
protected Injector createInjector() {
|
||||
return Guice.createInjector(new InMemoryModule());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void fullTextWithSpecialChars() throws Exception {
|
||||
TestRepository<InMemoryRepository> repo = createProject("repo");
|
||||
RevCommit commit1 =
|
||||
repo.parseBody(repo.commit().message("foo_bar_foo").create());
|
||||
Change change1 = newChange(repo, commit1, null, null, null).insert();
|
||||
RevCommit commit2 =
|
||||
repo.parseBody(repo.commit().message("one.two.three").create());
|
||||
Change change2 = newChange(repo, commit2, null, null, null).insert();
|
||||
|
||||
assertTrue(query("message:foo_ba").isEmpty());
|
||||
assertResultEquals(change1, queryOne("message:bar"));
|
||||
assertResultEquals(change1, queryOne("message:foo_bar"));
|
||||
assertResultEquals(change1, queryOne("message:foo bar"));
|
||||
assertResultEquals(change2, queryOne("message:two"));
|
||||
assertResultEquals(change2, queryOne("message:one.two"));
|
||||
assertResultEquals(change2, queryOne("message:one two"));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user