Fix sorting of results from Lucene for account, group and project index

The fields that are used for sorting must be added to the document. This
means the documents in the index must be all recomputed, hence we need
new index schema versions for the affected indexes.

The sorting that is done by Lucene for the account index must match the
sorting of accounts that is already done in QueryAccounts before
returning query results to clients. QueryAccounts sorts results by
fullname, preferred email and account ID. If Lucene would do a different
sorting limited queries may return wrong results. E.g. if there are 3
accounts, Foo (ID=1), Bar (ID=2) and Baz (ID=3), which are all matched
by a query for which the results are limited to 2, callers of
QueryAccounts expect Bar and Baz to be returned since QueryAccounts
promises sorting by fullname. If now Lucene would sort by account ID,
Lucene would find Foo and Bar, Baz would be skipped since it's over the
limit. These results are then handed over to QueryAccounts which does
sorting by name so that Bar and Foo are returned to the client. This
would be wrong since the caller expects Bar and Baz. It's a bit unclear
how this worked when Lucene was not doing proper sorting at all, maybe
it was just luck that the withLimit test succeeded before. As Lucene
does the sorting properly now, the sorting in QueryAccounts could be
removed, but since Lucene is not the only index implementation and other
index implementation may still have sorting issues we keep that sorting
in QueryAccounts for now.

Bug: Issue 10210
Change-Id: Ic59e4d330fe8c7198023ddbc2fa946cf5db80b63
Signed-off-by: Edwin Kempin <ekempin@google.com>
(cherry picked from commit 714f7d3c3f)
This commit is contained in:
Edwin Kempin
2018-12-24 10:37:20 +01:00
committed by Luca Milanesio
parent 8d89cb9535
commit f70d14d2ac
9 changed files with 172 additions and 12 deletions

View File

@@ -34,7 +34,10 @@ public class ProjectSchemaDefinitions extends SchemaDefinitions<ProjectData> {
static final Schema<ProjectData> V2 = schema(V1, ProjectField.STATE, ProjectField.REF_STATE);
// Bump Lucene version requires reindexing
static final Schema<ProjectData> V3 = schema(V2);
@Deprecated static final Schema<ProjectData> V3 = schema(V2);
// Lucene index was changed to add an additional field for sorting.
static final Schema<ProjectData> V4 = schema(V3);
public static final ProjectSchemaDefinitions INSTANCE = new ProjectSchemaDefinitions();

View File

@@ -14,10 +14,15 @@
package com.google.gerrit.lucene;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.gerrit.server.index.account.AccountField.FULL_NAME;
import static com.google.gerrit.server.index.account.AccountField.ID;
import static com.google.gerrit.server.index.account.AccountField.PREFERRED_EMAIL_EXACT;
import com.google.gerrit.index.FieldDef;
import com.google.gerrit.index.QueryOptions;
import com.google.gerrit.index.Schema;
import com.google.gerrit.index.Schema.Values;
import com.google.gerrit.index.query.DataSource;
import com.google.gerrit.index.query.Predicate;
import com.google.gerrit.index.query.QueryParseException;
@@ -35,6 +40,8 @@ import java.io.IOException;
import java.nio.file.Path;
import java.util.concurrent.ExecutionException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.Sort;
@@ -42,12 +49,15 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.eclipse.jgit.lib.Config;
public class LuceneAccountIndex extends AbstractLuceneIndex<Account.Id, AccountState>
implements AccountIndex {
private static final String ACCOUNTS = "accounts";
private static final String FULL_NAME_SORT_FIELD = sortFieldName(FULL_NAME);
private static final String EMAIL_SORT_FIELD = sortFieldName(PREFERRED_EMAIL_EXACT);
private static final String ID_SORT_FIELD = sortFieldName(ID);
private static Term idTerm(AccountState as) {
@@ -92,6 +102,23 @@ public class LuceneAccountIndex extends AbstractLuceneIndex<Account.Id, AccountS
queryBuilder = new QueryBuilder<>(schema, indexWriterConfig.getAnalyzer());
}
@Override
void add(Document doc, Values<AccountState> values) {
// Add separate DocValues fields for those fields needed for sorting.
FieldDef<AccountState, ?> f = values.getField();
if (f == ID) {
int v = (Integer) getOnlyElement(values.getValues());
doc.add(new NumericDocValuesField(ID_SORT_FIELD, v));
} else if (f == FULL_NAME) {
String value = (String) getOnlyElement(values.getValues());
doc.add(new SortedDocValuesField(FULL_NAME_SORT_FIELD, new BytesRef(value)));
} else if (f == PREFERRED_EMAIL_EXACT) {
String value = (String) getOnlyElement(values.getValues());
doc.add(new SortedDocValuesField(EMAIL_SORT_FIELD, new BytesRef(value)));
}
super.add(doc, values);
}
@Override
public void replace(AccountState as) throws IOException {
try {
@@ -114,9 +141,14 @@ public class LuceneAccountIndex extends AbstractLuceneIndex<Account.Id, AccountS
public DataSource<AccountState> getSource(Predicate<AccountState> p, QueryOptions opts)
throws QueryParseException {
return new LuceneQuerySource(
opts.filterFields(IndexUtils::accountFields),
queryBuilder.toQuery(p),
new Sort(new SortField(ID_SORT_FIELD, SortField.Type.LONG, true)));
opts.filterFields(IndexUtils::accountFields), queryBuilder.toQuery(p), getSort());
}
private Sort getSort() {
return new Sort(
new SortField(FULL_NAME_SORT_FIELD, SortField.Type.STRING, false),
new SortField(EMAIL_SORT_FIELD, SortField.Type.STRING, false),
new SortField(ID_SORT_FIELD, SortField.Type.LONG, false));
}
@Override

View File

@@ -14,10 +14,13 @@
package com.google.gerrit.lucene;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.gerrit.server.index.group.GroupField.UUID;
import com.google.gerrit.index.FieldDef;
import com.google.gerrit.index.QueryOptions;
import com.google.gerrit.index.Schema;
import com.google.gerrit.index.Schema.Values;
import com.google.gerrit.index.query.DataSource;
import com.google.gerrit.index.query.Predicate;
import com.google.gerrit.index.query.QueryParseException;
@@ -35,6 +38,7 @@ import java.io.IOException;
import java.nio.file.Path;
import java.util.concurrent.ExecutionException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.Sort;
@@ -42,6 +46,7 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.eclipse.jgit.lib.Config;
public class LuceneGroupIndex extends AbstractLuceneIndex<AccountGroup.UUID, InternalGroup>
@@ -93,6 +98,17 @@ public class LuceneGroupIndex extends AbstractLuceneIndex<AccountGroup.UUID, Int
queryBuilder = new QueryBuilder<>(schema, indexWriterConfig.getAnalyzer());
}
@Override
void add(Document doc, Values<InternalGroup> values) {
// Add separate DocValues field for the field that is needed for sorting.
FieldDef<InternalGroup, ?> f = values.getField();
if (f == UUID) {
String value = (String) getOnlyElement(values.getValues());
doc.add(new SortedDocValuesField(UUID_SORT_FIELD, new BytesRef(value)));
}
super.add(doc, values);
}
@Override
public void replace(InternalGroup group) throws IOException {
try {

View File

@@ -14,10 +14,13 @@
package com.google.gerrit.lucene;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.gerrit.index.project.ProjectField.NAME;
import com.google.gerrit.index.FieldDef;
import com.google.gerrit.index.QueryOptions;
import com.google.gerrit.index.Schema;
import com.google.gerrit.index.Schema.Values;
import com.google.gerrit.index.project.ProjectData;
import com.google.gerrit.index.project.ProjectIndex;
import com.google.gerrit.index.query.DataSource;
@@ -35,6 +38,7 @@ import java.io.IOException;
import java.nio.file.Path;
import java.util.concurrent.ExecutionException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.Sort;
@@ -42,6 +46,7 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.eclipse.jgit.lib.Config;
public class LuceneProjectIndex extends AbstractLuceneIndex<Project.NameKey, ProjectData>
@@ -92,6 +97,17 @@ public class LuceneProjectIndex extends AbstractLuceneIndex<Project.NameKey, Pro
queryBuilder = new QueryBuilder<>(schema, indexWriterConfig.getAnalyzer());
}
@Override
void add(Document doc, Values<ProjectData> values) {
// Add separate DocValues field for the field that is needed for sorting.
FieldDef<ProjectData, ?> f = values.getField();
if (f == NAME) {
String value = (String) getOnlyElement(values.getValues());
doc.add(new SortedDocValuesField(NAME_SORT_FIELD, new BytesRef(value)));
}
super.add(doc, values);
}
@Override
public void replace(ProjectData projectState) throws IOException {
try {

View File

@@ -46,7 +46,10 @@ public class AccountSchemaDefinitions extends SchemaDefinitions<AccountState> {
static final Schema<AccountState> V8 = schema(V7, AccountField.NAME_PART_NO_SECONDARY_EMAIL);
// Bump Lucene version requires reindexing
static final Schema<AccountState> V9 = schema(V8);
@Deprecated static final Schema<AccountState> V9 = schema(V8);
// Lucene index was changed to add additional fields for sorting.
static final Schema<AccountState> V10 = schema(V9);
public static final String NAME = "accounts";
public static final AccountSchemaDefinitions INSTANCE = new AccountSchemaDefinitions();

View File

@@ -40,7 +40,10 @@ public class GroupSchemaDefinitions extends SchemaDefinitions<InternalGroup> {
@Deprecated static final Schema<InternalGroup> V5 = schema(V4, GroupField.REF_STATE);
// Bump Lucene version requires reindexing
static final Schema<InternalGroup> V6 = schema(V5);
@Deprecated static final Schema<InternalGroup> V6 = schema(V5);
// Lucene index was changed to add an additional field for sorting.
static final Schema<InternalGroup> V7 = schema(V6);
public static final GroupSchemaDefinitions INSTANCE = new GroupSchemaDefinitions();