9ac0746423
Version 4.10.2 includes a couple of bug fixes [1], but these are not necessarily needed by Gerrit. This update is needed because Elasticsearch 1.4.0 has a dependency on this version [2]. There is no change in the index format since 4.10.1 so we reuse the same version number for the Lucene backed change index. [1] http://lucene.apache.org/core/4_10_2/changes/Changes.html#v4.10.2.bug_fixes [2] http://mvnrepository.com/artifact/org.elasticsearch/elasticsearch/1.4.0 Change-Id: Ie47dd2238493da44659c73c7f97f0caa2f7fdfa4
174 lines
5.8 KiB
Java
174 lines
5.8 KiB
Java
// Copyright (C) 2013 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
import com.google.gerrit.server.documentation.Constants;
|
|
|
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
import org.apache.lucene.analysis.util.CharArraySet;
|
|
import org.apache.lucene.document.Document;
|
|
import org.apache.lucene.document.Field;
|
|
import org.apache.lucene.document.StringField;
|
|
import org.apache.lucene.document.TextField;
|
|
import org.apache.lucene.index.IndexWriter;
|
|
import org.apache.lucene.index.IndexWriterConfig;
|
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
import org.apache.lucene.store.IndexInput;
|
|
import org.apache.lucene.store.RAMDirectory;
|
|
import org.apache.lucene.util.Version;
|
|
import org.kohsuke.args4j.Argument;
|
|
import org.kohsuke.args4j.CmdLineException;
|
|
import org.kohsuke.args4j.CmdLineParser;
|
|
import org.kohsuke.args4j.Option;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.FileOutputStream;
|
|
import java.io.FileReader;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.jar.JarEntry;
|
|
import java.util.jar.JarOutputStream;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
public class DocIndexer {
|
|
@SuppressWarnings("deprecation")
|
|
private static final Version LUCENE_VERSION = Version.LUCENE_4_10_1;
|
|
private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");
|
|
|
|
@Option(name = "-o", usage = "output JAR file")
|
|
private String outFile;
|
|
|
|
@Option(name = "--prefix", usage = "prefix for the html filepath")
|
|
private String prefix = "";
|
|
|
|
@Option(name = "--in-ext", usage = "extension for input files")
|
|
private String inExt = ".txt";
|
|
|
|
@Option(name = "--out-ext", usage = "extension for output files")
|
|
private String outExt = ".html";
|
|
|
|
@Argument(usage = "input files")
|
|
private List<String> inputFiles = new ArrayList<>();
|
|
|
|
private void invoke(String... parameters) throws IOException {
|
|
CmdLineParser parser = new CmdLineParser(this);
|
|
try {
|
|
parser.parseArgument(parameters);
|
|
if (inputFiles.isEmpty()) {
|
|
throw new CmdLineException(parser, "FAILED: input file missing");
|
|
}
|
|
} catch (CmdLineException e) {
|
|
System.err.println(e.getMessage());
|
|
parser.printUsage(System.err);
|
|
System.exit(1);
|
|
return;
|
|
}
|
|
|
|
byte[] compressedIndex = zip(index());
|
|
JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile));
|
|
JarEntry entry = new JarEntry(
|
|
String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP));
|
|
entry.setSize(compressedIndex.length);
|
|
jar.putNextEntry(entry);
|
|
jar.write(compressedIndex);
|
|
jar.closeEntry();
|
|
jar.close();
|
|
}
|
|
|
|
private RAMDirectory index() throws IOException,
|
|
UnsupportedEncodingException, FileNotFoundException {
|
|
RAMDirectory directory = new RAMDirectory();
|
|
IndexWriterConfig config = new IndexWriterConfig(
|
|
LUCENE_VERSION,
|
|
new StandardAnalyzer(CharArraySet.EMPTY_SET));
|
|
config.setOpenMode(OpenMode.CREATE);
|
|
IndexWriter iwriter = new IndexWriter(directory, config);
|
|
for (String inputFile : inputFiles) {
|
|
File file = new File(inputFile);
|
|
if (file.length() == 0) {
|
|
continue;
|
|
}
|
|
|
|
BufferedReader titleReader = new BufferedReader(
|
|
new InputStreamReader(new FileInputStream(file), "UTF-8"));
|
|
String title = titleReader.readLine();
|
|
if (title != null && title.startsWith("[[")) {
|
|
// Generally the first line of the txt is the title. In a few cases the
|
|
// first line is a "[[tag]]" and the second line is the title.
|
|
title = titleReader.readLine();
|
|
}
|
|
titleReader.close();
|
|
Matcher matcher = SECTION_HEADER.matcher(title);
|
|
if (matcher.matches()) {
|
|
title = matcher.group(1);
|
|
}
|
|
|
|
String outputFile = AsciiDoctor.mapInFileToOutFile(
|
|
inputFile, inExt, outExt);
|
|
FileReader reader = new FileReader(file);
|
|
Document doc = new Document();
|
|
doc.add(new TextField(Constants.DOC_FIELD, reader));
|
|
doc.add(new StringField(
|
|
Constants.URL_FIELD, prefix + outputFile, Field.Store.YES));
|
|
doc.add(new TextField(Constants.TITLE_FIELD, title, Field.Store.YES));
|
|
iwriter.addDocument(doc);
|
|
reader.close();
|
|
}
|
|
iwriter.close();
|
|
return directory;
|
|
}
|
|
|
|
private byte[] zip(RAMDirectory dir) throws IOException {
|
|
ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
|
ZipOutputStream zip = new ZipOutputStream(buf);
|
|
|
|
for (String name : dir.listAll()) {
|
|
IndexInput in = dir.openInput(name, null);
|
|
try {
|
|
int len = (int) in.length();
|
|
byte[] tmp = new byte[len];
|
|
ZipEntry entry = new ZipEntry(name);
|
|
entry.setSize(len);
|
|
in.readBytes(tmp, 0, len);
|
|
zip.putNextEntry(entry);
|
|
zip.write(tmp, 0, len);
|
|
zip.closeEntry();
|
|
} finally {
|
|
in.close();
|
|
}
|
|
}
|
|
|
|
zip.close();
|
|
return buf.toByteArray();
|
|
}
|
|
|
|
public static void main(String[] args) {
|
|
try {
|
|
new DocIndexer().invoke(args);
|
|
} catch (IOException e) {
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
}
|
|
}
|
|
}
|