From a42c0bb09a50f881634ebd14b90cc4c453d863c6 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Wed, 8 Jan 2014 13:06:01 -0800 Subject: [PATCH] Simplify documentation indexer Write the index entirely in RAM. Its only a few hundred KiB, which trivially fits in memory. Compress it twice in memory, once to build the ZIP that is unpacked at runtime, and again to package it into a JAR for linking with the runtime. This saves a build step in the BUCK rules. Move the ZIP under the server package name, to reduce any risk of collision with another concept of "index.zip". Change-Id: I74e59712e9855ac79c5220ff0a6b30ecbc3d152f --- Documentation/BUCK | 13 +--- .../server/documentation/Constants.java | 2 + .../QueryDocumentationExecutor.java | 6 +- lib/asciidoctor/java/DocIndexer.java | 67 +++++++++++++------ 4 files changed, 51 insertions(+), 37 deletions(-) diff --git a/Documentation/BUCK b/Documentation/BUCK index b94d12340f..9c2aea86c6 100644 --- a/Documentation/BUCK +++ b/Documentation/BUCK @@ -62,8 +62,7 @@ python_binary( genrule( name = 'index', cmd = '$(exe //lib/asciidoctor:doc_indexer) ' + - '-z $OUT ' + - '--tmp $TMP ' + + '-o $OUT ' + '--prefix "%s/" ' % DOC_DIR + '--in-ext ".txt" ' + '--out-ext ".html" ' + @@ -73,20 +72,12 @@ genrule( ':licenses.txt', '//lib/asciidoctor:doc_indexer', ], - out = 'index.zip', -) - -genrule( - name = 'index_jar', - cmd = 'jar cf $OUT -C $SRCDIR index.zip', - srcs = [genfile('index.zip')], - deps = [':index'], out = 'index.jar', ) prebuilt_jar( name = 'index_lib', binary_jar = genfile('index.jar'), - deps = [':index_jar'], + deps = [':index'], visibility = ['PUBLIC'], ) diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java index bfa2de28ac..388c1d8052 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java @@ -15,6 +15,8 @@ package com.google.gerrit.server.documentation; public class Constants { + public static final String PACKAGE = "com/google/gerrit/server/documentation"; + public static final String INDEX_ZIP = "index.zip"; public static final String DOC_FIELD = "doc"; public static final String TITLE_FIELD = "title"; diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java index 4f6cc3c02c..05027e280b 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java @@ -44,7 +44,6 @@ public class QueryDocumentationExecutor { private static final Logger log = LoggerFactory.getLogger(QueryDocumentationExecutor.class); - private static final String INDEX_PATH = "index.zip"; private static final Version LUCENE_VERSION = Version.LUCENE_46; private IndexSearcher searcher; @@ -107,13 +106,12 @@ public class QueryDocumentationExecutor { protected Directory readIndexDirectory() throws IOException { Directory dir = new RAMDirectory(); byte[] buffer = new byte[4096]; - InputStream index = - QueryDocumentationExecutor.class.getClassLoader() - .getResourceAsStream(INDEX_PATH); + InputStream index = getClass().getResourceAsStream(Constants.INDEX_ZIP); if (index == null) { log.warn("No index available"); return null; } + ZipInputStream zip = new ZipInputStream(index); try { ZipEntry entry; diff --git a/lib/asciidoctor/java/DocIndexer.java b/lib/asciidoctor/java/DocIndexer.java index 6cb9d62a9f..0cb785c762 100644 --- a/lib/asciidoctor/java/DocIndexer.java +++ b/lib/asciidoctor/java/DocIndexer.java @@ -23,7 +23,8 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.kohsuke.args4j.Argument; import org.kohsuke.args4j.CmdLineException; @@ -31,27 +32,30 @@ import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.Option; import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; +import java.util.jar.JarEntry; +import java.util.jar.JarOutputStream; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; public class DocIndexer { private static final Version LUCENE_VERSION = Version.LUCENE_46; private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)"); - @Option(name = "-z", usage = "output zip file") - private String zipFile; - - @Option(name = "--tmp", usage = "temporary output path") - private File tmpdir; + @Option(name = "-o", usage = "output JAR file") + private String outFile; @Option(name = "--prefix", usage = "prefix for the html filepath") private String prefix = ""; @@ -79,7 +83,20 @@ public class DocIndexer { return; } - NIOFSDirectory directory = new NIOFSDirectory(tmpdir); + byte[] compressedIndex = zip(index()); + JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile)); + JarEntry entry = new JarEntry( + String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP)); + entry.setSize(compressedIndex.length); + jar.putNextEntry(entry); + jar.write(compressedIndex); + jar.closeEntry(); + jar.close(); + } + + private RAMDirectory index() throws IOException, + UnsupportedEncodingException, FileNotFoundException { + RAMDirectory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig( LUCENE_VERSION, new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET)); @@ -114,25 +131,31 @@ public class DocIndexer { reader.close(); } iwriter.close(); - - ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile)); - zipDir(tmpdir, "", zip); - zip.close(); + return directory; } - private static void zipDir(File dir, String prefix, ZipOutputStream zip) - throws IOException { - for (File file : dir.listFiles()) { - String name = file.getName(); - if (!prefix.isEmpty()) { - name = prefix + "/" + name; - } - if (file.isDirectory()) { - zipDir(file, name, zip); - } else { - AsciiDoctor.zipFile(file, name, zip); + private byte[] zip(RAMDirectory dir) throws IOException { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + ZipOutputStream zip = new ZipOutputStream(buf); + + for (String name : dir.listAll()) { + IndexInput in = dir.openInput(name, null); + try { + int len = (int) in.length(); + byte[] tmp = new byte[len]; + ZipEntry entry = new ZipEntry(name); + entry.setSize(len); + in.readBytes(tmp, 0, len); + zip.putNextEntry(entry); + zip.write(tmp, 0, len); + zip.closeEntry(); + } finally { + in.close(); } } + + zip.close(); + return buf.toByteArray(); } public static void main(String[] args) {