Simplify documentation indexer

Write the index entirely in RAM. Its only a few hundred KiB, which
trivially fits in memory.

Compress it twice in memory, once to build the ZIP that is unpacked
at runtime, and again to package it into a JAR for linking with
the runtime. This saves a build step in the BUCK rules.

Move the ZIP under the server package name, to reduce any risk
of collision with another concept of "index.zip".

Change-Id: I74e59712e9855ac79c5220ff0a6b30ecbc3d152f
This commit is contained in:
Shawn Pearce
2014-01-08 13:06:01 -08:00
parent a427d9596e
commit a42c0bb09a
4 changed files with 51 additions and 37 deletions

View File

@@ -62,8 +62,7 @@ python_binary(
genrule(
name = 'index',
cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
'-z $OUT ' +
'--tmp $TMP ' +
'-o $OUT ' +
'--prefix "%s/" ' % DOC_DIR +
'--in-ext ".txt" ' +
'--out-ext ".html" ' +
@@ -73,20 +72,12 @@ genrule(
':licenses.txt',
'//lib/asciidoctor:doc_indexer',
],
out = 'index.zip',
)
genrule(
name = 'index_jar',
cmd = 'jar cf $OUT -C $SRCDIR index.zip',
srcs = [genfile('index.zip')],
deps = [':index'],
out = 'index.jar',
)
prebuilt_jar(
name = 'index_lib',
binary_jar = genfile('index.jar'),
deps = [':index_jar'],
deps = [':index'],
visibility = ['PUBLIC'],
)

View File

@@ -15,6 +15,8 @@
package com.google.gerrit.server.documentation;
public class Constants {
public static final String PACKAGE = "com/google/gerrit/server/documentation";
public static final String INDEX_ZIP = "index.zip";
public static final String DOC_FIELD = "doc";
public static final String TITLE_FIELD = "title";

View File

@@ -44,7 +44,6 @@ public class QueryDocumentationExecutor {
private static final Logger log =
LoggerFactory.getLogger(QueryDocumentationExecutor.class);
private static final String INDEX_PATH = "index.zip";
private static final Version LUCENE_VERSION = Version.LUCENE_46;
private IndexSearcher searcher;
@@ -107,13 +106,12 @@ public class QueryDocumentationExecutor {
protected Directory readIndexDirectory() throws IOException {
Directory dir = new RAMDirectory();
byte[] buffer = new byte[4096];
InputStream index =
QueryDocumentationExecutor.class.getClassLoader()
.getResourceAsStream(INDEX_PATH);
InputStream index = getClass().getResourceAsStream(Constants.INDEX_ZIP);
if (index == null) {
log.warn("No index available");
return null;
}
ZipInputStream zip = new ZipInputStream(index);
try {
ZipEntry entry;

View File

@@ -23,7 +23,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
@@ -31,27 +32,30 @@ import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class DocIndexer {
private static final Version LUCENE_VERSION = Version.LUCENE_46;
private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");
@Option(name = "-z", usage = "output zip file")
private String zipFile;
@Option(name = "--tmp", usage = "temporary output path")
private File tmpdir;
@Option(name = "-o", usage = "output JAR file")
private String outFile;
@Option(name = "--prefix", usage = "prefix for the html filepath")
private String prefix = "";
@@ -79,7 +83,20 @@ public class DocIndexer {
return;
}
NIOFSDirectory directory = new NIOFSDirectory(tmpdir);
byte[] compressedIndex = zip(index());
JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile));
JarEntry entry = new JarEntry(
String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP));
entry.setSize(compressedIndex.length);
jar.putNextEntry(entry);
jar.write(compressedIndex);
jar.closeEntry();
jar.close();
}
private RAMDirectory index() throws IOException,
UnsupportedEncodingException, FileNotFoundException {
RAMDirectory directory = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(
LUCENE_VERSION,
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
@@ -114,25 +131,31 @@ public class DocIndexer {
reader.close();
}
iwriter.close();
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
zipDir(tmpdir, "", zip);
zip.close();
return directory;
}
private static void zipDir(File dir, String prefix, ZipOutputStream zip)
throws IOException {
for (File file : dir.listFiles()) {
String name = file.getName();
if (!prefix.isEmpty()) {
name = prefix + "/" + name;
}
if (file.isDirectory()) {
zipDir(file, name, zip);
} else {
AsciiDoctor.zipFile(file, name, zip);
private byte[] zip(RAMDirectory dir) throws IOException {
ByteArrayOutputStream buf = new ByteArrayOutputStream();
ZipOutputStream zip = new ZipOutputStream(buf);
for (String name : dir.listAll()) {
IndexInput in = dir.openInput(name, null);
try {
int len = (int) in.length();
byte[] tmp = new byte[len];
ZipEntry entry = new ZipEntry(name);
entry.setSize(len);
in.readBytes(tmp, 0, len);
zip.putNextEntry(entry);
zip.write(tmp, 0, len);
zip.closeEntry();
} finally {
in.close();
}
}
zip.close();
return buf.toByteArray();
}
public static void main(String[] args) {