Simplify documentation indexer
Write the index entirely in RAM. Its only a few hundred KiB, which trivially fits in memory. Compress it twice in memory, once to build the ZIP that is unpacked at runtime, and again to package it into a JAR for linking with the runtime. This saves a build step in the BUCK rules. Move the ZIP under the server package name, to reduce any risk of collision with another concept of "index.zip". Change-Id: I74e59712e9855ac79c5220ff0a6b30ecbc3d152f
This commit is contained in:
@@ -62,8 +62,7 @@ python_binary(
|
|||||||
genrule(
|
genrule(
|
||||||
name = 'index',
|
name = 'index',
|
||||||
cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
|
cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
|
||||||
'-z $OUT ' +
|
'-o $OUT ' +
|
||||||
'--tmp $TMP ' +
|
|
||||||
'--prefix "%s/" ' % DOC_DIR +
|
'--prefix "%s/" ' % DOC_DIR +
|
||||||
'--in-ext ".txt" ' +
|
'--in-ext ".txt" ' +
|
||||||
'--out-ext ".html" ' +
|
'--out-ext ".html" ' +
|
||||||
@@ -73,20 +72,12 @@ genrule(
|
|||||||
':licenses.txt',
|
':licenses.txt',
|
||||||
'//lib/asciidoctor:doc_indexer',
|
'//lib/asciidoctor:doc_indexer',
|
||||||
],
|
],
|
||||||
out = 'index.zip',
|
|
||||||
)
|
|
||||||
|
|
||||||
genrule(
|
|
||||||
name = 'index_jar',
|
|
||||||
cmd = 'jar cf $OUT -C $SRCDIR index.zip',
|
|
||||||
srcs = [genfile('index.zip')],
|
|
||||||
deps = [':index'],
|
|
||||||
out = 'index.jar',
|
out = 'index.jar',
|
||||||
)
|
)
|
||||||
|
|
||||||
prebuilt_jar(
|
prebuilt_jar(
|
||||||
name = 'index_lib',
|
name = 'index_lib',
|
||||||
binary_jar = genfile('index.jar'),
|
binary_jar = genfile('index.jar'),
|
||||||
deps = [':index_jar'],
|
deps = [':index'],
|
||||||
visibility = ['PUBLIC'],
|
visibility = ['PUBLIC'],
|
||||||
)
|
)
|
||||||
|
@@ -15,6 +15,8 @@
|
|||||||
package com.google.gerrit.server.documentation;
|
package com.google.gerrit.server.documentation;
|
||||||
|
|
||||||
public class Constants {
|
public class Constants {
|
||||||
|
public static final String PACKAGE = "com/google/gerrit/server/documentation";
|
||||||
|
public static final String INDEX_ZIP = "index.zip";
|
||||||
|
|
||||||
public static final String DOC_FIELD = "doc";
|
public static final String DOC_FIELD = "doc";
|
||||||
public static final String TITLE_FIELD = "title";
|
public static final String TITLE_FIELD = "title";
|
||||||
|
@@ -44,7 +44,6 @@ public class QueryDocumentationExecutor {
|
|||||||
private static final Logger log =
|
private static final Logger log =
|
||||||
LoggerFactory.getLogger(QueryDocumentationExecutor.class);
|
LoggerFactory.getLogger(QueryDocumentationExecutor.class);
|
||||||
|
|
||||||
private static final String INDEX_PATH = "index.zip";
|
|
||||||
private static final Version LUCENE_VERSION = Version.LUCENE_46;
|
private static final Version LUCENE_VERSION = Version.LUCENE_46;
|
||||||
|
|
||||||
private IndexSearcher searcher;
|
private IndexSearcher searcher;
|
||||||
@@ -107,13 +106,12 @@ public class QueryDocumentationExecutor {
|
|||||||
protected Directory readIndexDirectory() throws IOException {
|
protected Directory readIndexDirectory() throws IOException {
|
||||||
Directory dir = new RAMDirectory();
|
Directory dir = new RAMDirectory();
|
||||||
byte[] buffer = new byte[4096];
|
byte[] buffer = new byte[4096];
|
||||||
InputStream index =
|
InputStream index = getClass().getResourceAsStream(Constants.INDEX_ZIP);
|
||||||
QueryDocumentationExecutor.class.getClassLoader()
|
|
||||||
.getResourceAsStream(INDEX_PATH);
|
|
||||||
if (index == null) {
|
if (index == null) {
|
||||||
log.warn("No index available");
|
log.warn("No index available");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZipInputStream zip = new ZipInputStream(index);
|
ZipInputStream zip = new ZipInputStream(index);
|
||||||
try {
|
try {
|
||||||
ZipEntry entry;
|
ZipEntry entry;
|
||||||
|
@@ -23,7 +23,8 @@ import org.apache.lucene.document.TextField;
|
|||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.store.NIOFSDirectory;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.kohsuke.args4j.Argument;
|
import org.kohsuke.args4j.Argument;
|
||||||
import org.kohsuke.args4j.CmdLineException;
|
import org.kohsuke.args4j.CmdLineException;
|
||||||
@@ -31,27 +32,30 @@ import org.kohsuke.args4j.CmdLineParser;
|
|||||||
import org.kohsuke.args4j.Option;
|
import org.kohsuke.args4j.Option;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.jar.JarEntry;
|
||||||
|
import java.util.jar.JarOutputStream;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipOutputStream;
|
import java.util.zip.ZipOutputStream;
|
||||||
|
|
||||||
public class DocIndexer {
|
public class DocIndexer {
|
||||||
private static final Version LUCENE_VERSION = Version.LUCENE_46;
|
private static final Version LUCENE_VERSION = Version.LUCENE_46;
|
||||||
private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");
|
private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");
|
||||||
|
|
||||||
@Option(name = "-z", usage = "output zip file")
|
@Option(name = "-o", usage = "output JAR file")
|
||||||
private String zipFile;
|
private String outFile;
|
||||||
|
|
||||||
@Option(name = "--tmp", usage = "temporary output path")
|
|
||||||
private File tmpdir;
|
|
||||||
|
|
||||||
@Option(name = "--prefix", usage = "prefix for the html filepath")
|
@Option(name = "--prefix", usage = "prefix for the html filepath")
|
||||||
private String prefix = "";
|
private String prefix = "";
|
||||||
@@ -79,7 +83,20 @@ public class DocIndexer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
NIOFSDirectory directory = new NIOFSDirectory(tmpdir);
|
byte[] compressedIndex = zip(index());
|
||||||
|
JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile));
|
||||||
|
JarEntry entry = new JarEntry(
|
||||||
|
String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP));
|
||||||
|
entry.setSize(compressedIndex.length);
|
||||||
|
jar.putNextEntry(entry);
|
||||||
|
jar.write(compressedIndex);
|
||||||
|
jar.closeEntry();
|
||||||
|
jar.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private RAMDirectory index() throws IOException,
|
||||||
|
UnsupportedEncodingException, FileNotFoundException {
|
||||||
|
RAMDirectory directory = new RAMDirectory();
|
||||||
IndexWriterConfig config = new IndexWriterConfig(
|
IndexWriterConfig config = new IndexWriterConfig(
|
||||||
LUCENE_VERSION,
|
LUCENE_VERSION,
|
||||||
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
|
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
|
||||||
@@ -114,25 +131,31 @@ public class DocIndexer {
|
|||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
iwriter.close();
|
iwriter.close();
|
||||||
|
return directory;
|
||||||
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
|
|
||||||
zipDir(tmpdir, "", zip);
|
|
||||||
zip.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void zipDir(File dir, String prefix, ZipOutputStream zip)
|
private byte[] zip(RAMDirectory dir) throws IOException {
|
||||||
throws IOException {
|
ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
||||||
for (File file : dir.listFiles()) {
|
ZipOutputStream zip = new ZipOutputStream(buf);
|
||||||
String name = file.getName();
|
|
||||||
if (!prefix.isEmpty()) {
|
for (String name : dir.listAll()) {
|
||||||
name = prefix + "/" + name;
|
IndexInput in = dir.openInput(name, null);
|
||||||
}
|
try {
|
||||||
if (file.isDirectory()) {
|
int len = (int) in.length();
|
||||||
zipDir(file, name, zip);
|
byte[] tmp = new byte[len];
|
||||||
} else {
|
ZipEntry entry = new ZipEntry(name);
|
||||||
AsciiDoctor.zipFile(file, name, zip);
|
entry.setSize(len);
|
||||||
|
in.readBytes(tmp, 0, len);
|
||||||
|
zip.putNextEntry(entry);
|
||||||
|
zip.write(tmp, 0, len);
|
||||||
|
zip.closeEntry();
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zip.close();
|
||||||
|
return buf.toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
Reference in New Issue
Block a user