Elasticsearch: Add char analyzer to ensure consistency of query results
When using Elasticsearch, doing a query that involved the characters "." and "_", from full text fields, did not include results with keywords as a substring. This behavior was different from Lucene, where these two characters are mapped to the space character (" ") so that the query returns keywords separated by them. This change adds character mappings for Elasticsearch in order to ensure that the full-text queries return same results as when using Lucene. At index creation time, this change creates a new elasticsearch setting where an analyzer with character mappings is configured. This analyzer is then added to the elasticsearch mappings to be used by the full-text field queries. Because the elasticsearch mappings and settings can only be configured at index creation time, one should take the following steps to apply this change: 1. delete the index (changes, accounts, groups). 2. initialize an Elasticsearch site with this change. 3. reindex the documents (changes, accounts, groups). 4. start the site. This change applies to all the currently supported Elasticsearch versions. Bug: Issue 9146 Bug: Issue 9147 Change-Id: I6da7a98d35d912b5bee7cc510d02db4433f25538
This commit is contained in:
parent
0e2e776953
commit
85048f4963
@ -19,6 +19,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.apache.commons.codec.binary.Base64.decodeBase64;
|
||||
|
||||
import com.google.common.collect.FluentIterable;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.io.CharStreams;
|
||||
import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
|
||||
import com.google.gerrit.elasticsearch.builders.SearchSourceBuilder;
|
||||
@ -54,6 +55,7 @@ abstract class AbstractElasticIndex<K, V> implements Index<K, V> {
|
||||
protected static final String MAPPINGS = "mappings";
|
||||
protected static final String ORDER = "order";
|
||||
protected static final String SEARCH = "_search";
|
||||
protected static final String SETTINGS = "settings";
|
||||
|
||||
protected static <T> List<T> decodeProtos(
|
||||
JsonObject doc, String fieldName, ProtobufCodec<T> codec) {
|
||||
@ -156,7 +158,8 @@ abstract class AbstractElasticIndex<K, V> implements Index<K, V> {
|
||||
}
|
||||
|
||||
// Recreate the index.
|
||||
response = performRequest("PUT", getMappings(), indexName, Collections.emptyMap());
|
||||
String indexCreationFields = concatJsonString(getSettings(), getMappings());
|
||||
response = performRequest("PUT", indexCreationFields, indexName, Collections.emptyMap());
|
||||
statusCode = response.getStatusLine().getStatusCode();
|
||||
if (statusCode != HttpStatus.SC_OK) {
|
||||
String error = String.format("Failed to create index %s: %s", indexName, statusCode);
|
||||
@ -168,6 +171,10 @@ abstract class AbstractElasticIndex<K, V> implements Index<K, V> {
|
||||
|
||||
protected abstract String getMappings();
|
||||
|
||||
private String getSettings() {
|
||||
return gson.toJson(ImmutableMap.of(SETTINGS, ElasticSetting.createSetting()));
|
||||
}
|
||||
|
||||
protected abstract String getId(V v);
|
||||
|
||||
protected String getMappingsForSingleType(String candidateType, MappingProperties properties) {
|
||||
@ -225,6 +232,10 @@ abstract class AbstractElasticIndex<K, V> implements Index<K, V> {
|
||||
return performRequest("POST", payload, uri, params);
|
||||
}
|
||||
|
||||
private String concatJsonString(String target, String addition) {
|
||||
return target.substring(0, target.length() - 1) + "," + addition.substring(1);
|
||||
}
|
||||
|
||||
private Response performRequest(
|
||||
String method, Object payload, String uri, Map<String, String> params) throws IOException {
|
||||
String payloadStr = payload instanceof String ? (String) payload : payload.toString();
|
||||
|
@ -34,9 +34,9 @@ class ElasticMapping {
|
||||
|| fieldType == FieldType.INTEGER_RANGE
|
||||
|| fieldType == FieldType.LONG) {
|
||||
mapping.addNumber(name);
|
||||
} else if (fieldType == FieldType.PREFIX
|
||||
|| fieldType == FieldType.FULL_TEXT
|
||||
|| fieldType == FieldType.STORED_ONLY) {
|
||||
} else if (fieldType == FieldType.FULL_TEXT) {
|
||||
mapping.addStringWithAnalyzer(name);
|
||||
} else if (fieldType == FieldType.PREFIX || fieldType == FieldType.STORED_ONLY) {
|
||||
mapping.addString(name);
|
||||
} else {
|
||||
throw new IllegalStateException("Unsupported field type: " + fieldType.getName());
|
||||
@ -88,6 +88,13 @@ class ElasticMapping {
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder addStringWithAnalyzer(String name) {
|
||||
FieldProperties key = new FieldProperties(adapter.stringFieldType());
|
||||
key.analyzer = "custom_with_char_filter";
|
||||
fields.put(name, key);
|
||||
return this;
|
||||
}
|
||||
|
||||
Builder add(String name, String type) {
|
||||
fields.put(name, new FieldProperties(type));
|
||||
return this;
|
||||
@ -102,6 +109,7 @@ class ElasticMapping {
|
||||
String type;
|
||||
String index;
|
||||
String format;
|
||||
String analyzer;
|
||||
Map<String, FieldProperties> fields;
|
||||
|
||||
FieldProperties(String type) {
|
||||
|
@ -0,0 +1,92 @@
|
||||
// Copyright (C) 2018 The Android Open Source Project
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.gerrit.elasticsearch;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import java.util.Map;
|
||||
|
||||
class ElasticSetting {
|
||||
/** The custom char mappings of "." to " " and "_" to " " in the form of UTF-8 */
|
||||
private static final ImmutableMap<String, String> CUSTOM_CHAR_MAPPING =
|
||||
ImmutableMap.of("\\u002E", "\\u0020", "\\u005F", "\\u0020");
|
||||
|
||||
static SettingProperties createSetting() {
|
||||
ElasticSetting.Builder settings = new ElasticSetting.Builder();
|
||||
settings.addCharFilter();
|
||||
settings.addAnalyzer();
|
||||
return settings.build();
|
||||
}
|
||||
|
||||
static class Builder {
|
||||
private final ImmutableMap.Builder<String, FieldProperties> fields =
|
||||
new ImmutableMap.Builder<>();
|
||||
|
||||
SettingProperties build() {
|
||||
SettingProperties properties = new SettingProperties();
|
||||
properties.analysis = fields.build();
|
||||
return properties;
|
||||
}
|
||||
|
||||
void addCharFilter() {
|
||||
FieldProperties charMapping = new FieldProperties("mapping");
|
||||
charMapping.mappings = getCustomCharMappings(CUSTOM_CHAR_MAPPING);
|
||||
|
||||
FieldProperties charFilter = new FieldProperties();
|
||||
charFilter.customMapping = charMapping;
|
||||
fields.put("char_filter", charFilter);
|
||||
}
|
||||
|
||||
void addAnalyzer() {
|
||||
FieldProperties customAnalyzer = new FieldProperties("custom");
|
||||
customAnalyzer.tokenizer = "standard";
|
||||
customAnalyzer.charFilter = new String[] {"custom_mapping"};
|
||||
customAnalyzer.filter = new String[] {"lowercase"};
|
||||
|
||||
FieldProperties analyzer = new FieldProperties();
|
||||
analyzer.customWithCharFilter = customAnalyzer;
|
||||
fields.put("analyzer", analyzer);
|
||||
}
|
||||
|
||||
private static String[] getCustomCharMappings(ImmutableMap<String, String> map) {
|
||||
int mappingIndex = 0;
|
||||
int numOfMappings = map.size();
|
||||
String[] mapping = new String[numOfMappings];
|
||||
for (Map.Entry<String, String> e : map.entrySet()) {
|
||||
mapping[mappingIndex++] = e.getKey() + "=>" + e.getValue();
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
}
|
||||
|
||||
static class SettingProperties {
|
||||
Map<String, FieldProperties> analysis;
|
||||
}
|
||||
|
||||
static class FieldProperties {
|
||||
String tokenizer;
|
||||
String type;
|
||||
String[] charFilter;
|
||||
String[] filter;
|
||||
String[] mappings;
|
||||
FieldProperties customMapping;
|
||||
FieldProperties customWithCharFilter;
|
||||
|
||||
FieldProperties() {}
|
||||
|
||||
FieldProperties(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user