Simplify documentation indexer

Write the index entirely in RAM. Its only a few hundred KiB, which trivially fits in memory. Compress it twice in memory, once to build the ZIP that is unpacked at runtime, and again to package it into a JAR for linking with the runtime. This saves a build step in the BUCK rules. Move the ZIP under the server package name, to reduce any risk of collision with another concept of "index.zip". Change-Id: I74e59712e9855ac79c5220ff0a6b30ecbc3d152f
2014-01-08 13:06:01 -08:00
parent a427d9596e
commit a42c0bb09a
4 changed files with 51 additions and 37 deletions
--- a/Documentation/BUCK
+++ b/Documentation/BUCK
@@ -62,8 +62,7 @@ python_binary(
 genrule(
  name = 'index',
  cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
-      '-z $OUT ' +
-      '--tmp $TMP ' +
+      '-o $OUT ' +
      '--prefix "%s/" ' % DOC_DIR +
      '--in-ext ".txt" ' +
      '--out-ext ".html" ' +
@@ -73,20 +72,12 @@ genrule(
    ':licenses.txt',
    '//lib/asciidoctor:doc_indexer',
  ],
-  out = 'index.zip',
-)
-
-genrule(
-  name = 'index_jar',
-  cmd = 'jar cf $OUT -C $SRCDIR index.zip',
-  srcs = [genfile('index.zip')],
-  deps = [':index'],
  out = 'index.jar',
 )

 prebuilt_jar(
  name = 'index_lib',
  binary_jar = genfile('index.jar'),
-  deps = [':index_jar'],
+  deps = [':index'],
  visibility = ['PUBLIC'],
 )
--- a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/Constants.java
@@ -15,6 +15,8 @@
 package com.google.gerrit.server.documentation;

 public class Constants {
+  public static final String PACKAGE = "com/google/gerrit/server/documentation";
+  public static final String INDEX_ZIP = "index.zip";

  public static final String DOC_FIELD = "doc";
  public static final String TITLE_FIELD = "title";
--- a/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/documentation/QueryDocumentationExecutor.java
@@ -44,7 +44,6 @@ public class QueryDocumentationExecutor {
  private static final Logger log =
      LoggerFactory.getLogger(QueryDocumentationExecutor.class);

-  private static final String INDEX_PATH = "index.zip";
  private static final Version LUCENE_VERSION = Version.LUCENE_46;

  private IndexSearcher searcher;
@@ -107,13 +106,12 @@ public class QueryDocumentationExecutor {
  protected Directory readIndexDirectory() throws IOException {
    Directory dir = new RAMDirectory();
    byte[] buffer = new byte[4096];
-    InputStream index =
-        QueryDocumentationExecutor.class.getClassLoader()
-            .getResourceAsStream(INDEX_PATH);
+    InputStream index = getClass().getResourceAsStream(Constants.INDEX_ZIP);
    if (index == null) {
      log.warn("No index available");
      return null;
    }
+
    ZipInputStream zip = new ZipInputStream(index);
    try {
      ZipEntry entry;
--- a/lib/asciidoctor/java/DocIndexer.java
+++ b/lib/asciidoctor/java/DocIndexer.java
@@ -23,7 +23,8 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.kohsuke.args4j.Argument;
 import org.kohsuke.args4j.CmdLineException;
@@ -31,27 +32,30 @@ import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;

 import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.jar.JarEntry;
+import java.util.jar.JarOutputStream;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;

 public class DocIndexer {
  private static final Version LUCENE_VERSION = Version.LUCENE_46;
  private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)");

-  @Option(name = "-z", usage = "output zip file")
-  private String zipFile;
-
-  @Option(name = "--tmp", usage = "temporary output path")
-  private File tmpdir;
+  @Option(name = "-o", usage = "output JAR file")
+  private String outFile;

  @Option(name = "--prefix", usage = "prefix for the html filepath")
  private String prefix = "";
@@ -79,7 +83,20 @@ public class DocIndexer {
      return;
    }

-    NIOFSDirectory directory = new NIOFSDirectory(tmpdir);
+    byte[] compressedIndex = zip(index());
+    JarOutputStream jar = new JarOutputStream(new FileOutputStream(outFile));
+    JarEntry entry = new JarEntry(
+        String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP));
+    entry.setSize(compressedIndex.length);
+    jar.putNextEntry(entry);
+    jar.write(compressedIndex);
+    jar.closeEntry();
+    jar.close();
+  }
+
+  private RAMDirectory index() throws IOException,
+      UnsupportedEncodingException, FileNotFoundException {
+    RAMDirectory directory = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(
        LUCENE_VERSION,
        new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
@@ -114,25 +131,31 @@ public class DocIndexer {
      reader.close();
    }
    iwriter.close();
-
-    ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
-    zipDir(tmpdir, "", zip);
-    zip.close();
+    return directory;
  }

-  private static void zipDir(File dir, String prefix, ZipOutputStream zip)
-      throws IOException {
-    for (File file : dir.listFiles()) {
-      String name = file.getName();
-      if (!prefix.isEmpty()) {
-        name = prefix + "/" + name;
-      }
-      if (file.isDirectory()) {
-        zipDir(file, name, zip);
-      } else {
-        AsciiDoctor.zipFile(file, name, zip);
+  private byte[] zip(RAMDirectory dir) throws IOException {
+    ByteArrayOutputStream buf = new ByteArrayOutputStream();
+    ZipOutputStream zip = new ZipOutputStream(buf);
+
+    for (String name : dir.listAll()) {
+      IndexInput in = dir.openInput(name, null);
+      try {
+        int len = (int) in.length();
+        byte[] tmp = new byte[len];
+        ZipEntry entry = new ZipEntry(name);
+        entry.setSize(len);
+        in.readBytes(tmp, 0, len);
+        zip.putNextEntry(entry);
+        zip.write(tmp, 0, len);
+        zip.closeEntry();
+      } finally {
+        in.close();
      }
    }
+
+    zip.close();
+    return buf.toByteArray();
  }

  public static void main(String[] args) {