
Eventually this will no longer be part of the serialized change proto, and even with the cache it is still expensive to check mergeability for each change in a list of search results, as the submit type must still be checked in order to look up the mergeability. The new MERGEABLE field now loads from the cache, although ChangeJson and several other callers still depend on the field in Change. This will facilitate index schema upgrades, in that a full reindex will also populate the persistent cache. While we're at it, upgrade Lucene to 4.10.1, which contains some important stability bugfixes[1]. [1] http://lucene.apache.org/core/4_10_1/changes/Changes.html#v4.10.1 Change-Id: I166b85f91bd596a3f0295616c2e72853b692dd54
173 lines
5.8 KiB
Java
173 lines
5.8 KiB
Java
// Copyright (C) 2013 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
import com.google.gerrit.server.documentation.Constants;
|
|
|
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
import org.apache.lucene.analysis.util.CharArraySet;
|
|
import org.apache.lucene.document.Document;
|
|
import org.apache.lucene.document.Field;
|
|
import org.apache.lucene.document.StringField;
|
|
import org.apache.lucene.document.TextField;
|
|
import org.apache.lucene.index.IndexWriter;
|
|
import org.apache.lucene.index.IndexWriterConfig;
|
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
import org.apache.lucene.store.IndexInput;
|
|
import org.apache.lucene.store.RAMDirectory;
|
|
import org.apache.lucene.util.Version;
|
|
import org.kohsuke.args4j.Argument;
|
|
import org.kohsuke.args4j.CmdLineException;
|
|
import org.kohsuke.args4j.CmdLineParser;
|
|
import org.kohsuke.args4j.Option;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.FileOutputStream;
|
|
import java.io.FileReader;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.jar.JarEntry;
|
|
import java.util.jar.JarOutputStream;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
public class DocIndexer {
|
|
private static final Version LUCENE_VERSION = Version.LUCENE_4_10_1;
|
|
private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");
|
|
|
|
@Option(name = "-o", usage = "output JAR file")
|
|
private String outFile;
|
|
|
|
@Option(name = "--prefix", usage = "prefix for the html filepath")
|
|
private String prefix = "";
|
|
|
|
@Option(name = "--in-ext", usage = "extension for input files")
|
|
private String inExt = ".txt";
|
|
|
|
@Option(name = "--out-ext", usage = "extension for output files")
|
|
private String outExt = ".html";
|
|
|
|
@Argument(usage = "input files")
|
|
private List<String> inputFiles = new ArrayList<>();
|
|
|
|
private void invoke(String... parameters) throws IOException {
|
|
CmdLineParser parser = new CmdLineParser(this);
|
|
try {
|
|
parser.parseArgument(parameters);
|
|
if (inputFiles.isEmpty()) {
|
|
throw new CmdLineException(parser, "FAILED: input file missing");
|
|
}
|
|
} catch (CmdLineException e) {
|
|
System.err.println(e.getMessage());
|
|
parser.printUsage(System.err);
|
|
System.exit(1);
|
|
return;
|
|
}
|
|
|
|
byte[] compressedIndex = zip(index());
|
|
JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile));
|
|
JarEntry entry = new JarEntry(
|
|
String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP));
|
|
entry.setSize(compressedIndex.length);
|
|
jar.putNextEntry(entry);
|
|
jar.write(compressedIndex);
|
|
jar.closeEntry();
|
|
jar.close();
|
|
}
|
|
|
|
private RAMDirectory index() throws IOException,
|
|
UnsupportedEncodingException, FileNotFoundException {
|
|
RAMDirectory directory = new RAMDirectory();
|
|
IndexWriterConfig config = new IndexWriterConfig(
|
|
LUCENE_VERSION,
|
|
new StandardAnalyzer(CharArraySet.EMPTY_SET));
|
|
config.setOpenMode(OpenMode.CREATE);
|
|
IndexWriter iwriter = new IndexWriter(directory, config);
|
|
for (String inputFile : inputFiles) {
|
|
File file = new File(inputFile);
|
|
if (file.length() == 0) {
|
|
continue;
|
|
}
|
|
|
|
BufferedReader titleReader = new BufferedReader(
|
|
new InputStreamReader(new FileInputStream(file), "UTF-8"));
|
|
String title = titleReader.readLine();
|
|
if (title != null && title.startsWith("[[")) {
|
|
// Generally the first line of the txt is the title. In a few cases the
|
|
// first line is a "[[tag]]" and the second line is the title.
|
|
title = titleReader.readLine();
|
|
}
|
|
titleReader.close();
|
|
Matcher matcher = SECTION_HEADER.matcher(title);
|
|
if (matcher.matches()) {
|
|
title = matcher.group(1);
|
|
}
|
|
|
|
String outputFile = AsciiDoctor.mapInFileToOutFile(
|
|
inputFile, inExt, outExt);
|
|
FileReader reader = new FileReader(file);
|
|
Document doc = new Document();
|
|
doc.add(new TextField(Constants.DOC_FIELD, reader));
|
|
doc.add(new StringField(
|
|
Constants.URL_FIELD, prefix + outputFile, Field.Store.YES));
|
|
doc.add(new TextField(Constants.TITLE_FIELD, title, Field.Store.YES));
|
|
iwriter.addDocument(doc);
|
|
reader.close();
|
|
}
|
|
iwriter.close();
|
|
return directory;
|
|
}
|
|
|
|
private byte[] zip(RAMDirectory dir) throws IOException {
|
|
ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
|
ZipOutputStream zip = new ZipOutputStream(buf);
|
|
|
|
for (String name : dir.listAll()) {
|
|
IndexInput in = dir.openInput(name, null);
|
|
try {
|
|
int len = (int) in.length();
|
|
byte[] tmp = new byte[len];
|
|
ZipEntry entry = new ZipEntry(name);
|
|
entry.setSize(len);
|
|
in.readBytes(tmp, 0, len);
|
|
zip.putNextEntry(entry);
|
|
zip.write(tmp, 0, len);
|
|
zip.closeEntry();
|
|
} finally {
|
|
in.close();
|
|
}
|
|
}
|
|
|
|
zip.close();
|
|
return buf.toByteArray();
|
|
}
|
|
|
|
public static void main(String[] args) {
|
|
try {
|
|
new DocIndexer().invoke(args);
|
|
} catch (IOException e) {
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
}
|
|
}
|
|
}
|