Index documentation using lucene.

We'll also index documentation while generating them. This CL will just store and pack the index files, but won't use them. Another CL will provide the search UI to use the index files. Index files will be stored at .index/ directory. Change-Id: I3f2fa01088f94aaa2e449b3df6c018895df2c5a8
2013-09-25 16:59:58 -07:00 · 2013-09-25 16:59:58 -07:00 · c5b0af0c77
commit c5b0af0c77
parent 01bf9abf38
4 changed files with 201 additions and 29 deletions
--- a/Documentation/BUCK
+++ b/Documentation/BUCK
@ -2,30 +2,36 @@ include_defs('//Documentation/asciidoc.defs')
 include_defs('//Documentation/config.defs')
 include_defs('//tools/git.defs')

+DOC_DIR = 'Documentation'
+INDEX_DIR = DOC_DIR + '/.index'
 MAIN = ['//gerrit-pgm:pgm', '//gerrit-gwtui:ui_module']
 SRCS = glob(['*.txt'], excludes = ['licenses.txt'])

 genrule(
  name = 'html',
  cmd = 'cd $TMP;' +
-    'mkdir -p Documentation/images;' +
-    'unzip -q $SRCDIR/only_html.zip -d Documentation/;' +
-    'for s in $SRCS;do ln -s $s Documentation;done;' +
-    'mv Documentation/*.{jpg,png} Documentation/images;' +
-    'rm Documentation/only_html.zip;' +
-    'rm Documentation/licenses.txt;' +
+    'mkdir -p %s/images;' % DOC_DIR +
+    'unzip -q $SRCDIR/index.zip -d %s/;' % INDEX_DIR +
+    'unzip -q $SRCDIR/only_html.zip -d %s/;' % DOC_DIR +
+    'for s in $SRCS;do ln -s $s %s;done;' % DOC_DIR +
+    'mv %s/*.{jpg,png} %s/images;' % (DOC_DIR, DOC_DIR) +
+    'rm %s/only_html.zip;' % DOC_DIR +
+    'rm %s/index.zip;' % DOC_DIR +
+    'rm %s/licenses.txt;' % DOC_DIR +
    'cp $SRCDIR/licenses.txt LICENSES.txt;' +
    'zip -qr $OUT *',
-  srcs = [genfile('only_html.zip')] +
-    glob([
+  srcs = glob([
      'images/*.jpg',
      'images/*.png',
    ]) + [
-    'doc.css',
-    genfile('licenses.txt'),
-  ],
+      'doc.css',
+      genfile('licenses.txt'),
+      genfile('only_html.zip'),
+      genfile('index.zip'),
+    ],
  deps = [
    ':generate_html',
+    ':index',
    ':licenses.txt',
  ],
  out = 'html.zip',
@ -57,3 +63,19 @@ python_binary(
  name = 'replace_macros',
  main = 'replace_macros.py',
 )
+
+genrule(
+  name = 'index',
+  cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
+      '-z $OUT ' +
+      '--prefix "%s/" ' % DOC_DIR +
+      '--in-ext ".txt" ' +
+      '--out-ext ".html" ' +
+      '$SRCS',
+  srcs = SRCS + [genfile('licenses.txt')],
+  deps = [
+    ':licenses.txt',
+    '//lib/asciidoctor:doc_indexer',
+  ],
+  out = 'index.zip',
+)
--- a/lib/asciidoctor/BUCK
+++ b/lib/asciidoctor/BUCK
@ -2,18 +2,38 @@ include_defs('//lib/maven.defs')

 java_binary(
  name = 'asciidoc',
-  main_class = 'Main',
-  deps = [':main_lib'],
+  main_class = 'AsciiDoctor',
+  deps = [':asciidoc_lib'],
  visibility = ['PUBLIC'],
 )

 java_library(
-  name = 'main_lib',
-  srcs = ['java/Main.java'],
+  name = 'asciidoc_lib',
+  srcs = ['java/AsciiDoctor.java'],
  deps = [
    ':asciidoctor',
    ':jruby',
    '//lib:args4j',
+    '//lib:guava',
+  ],
+)
+
+java_binary(
+  name = 'doc_indexer',
+  main_class = 'DocIndexer',
+  deps = [':doc_indexer_lib'],
+  visibility = ['PUBLIC'],
+)
+
+java_library(
+  name = 'doc_indexer_lib',
+  srcs = ['java/DocIndexer.java'],
+  deps = [
+    ':asciidoc_lib',
+    '//lib:args4j',
+    '//lib:guava',
+    '//lib/lucene:analyzers-common',
+    '//lib/lucene:core',
  ],
 )

--- a/lib/asciidoctor/java/AsciiDoctor.java
+++ b/lib/asciidoctor/java/AsciiDoctor.java
@ -15,6 +15,7 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
+import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@ -23,6 +24,8 @@ import java.util.Map;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;

+import com.google.common.io.ByteStreams;
+
 import org.asciidoctor.Asciidoctor;
 import org.asciidoctor.AttributesBuilder;
 import org.asciidoctor.Options;
@ -34,9 +37,8 @@ import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;

-public class Main {
+public class AsciiDoctor {

-  private static final int BUFSIZ = 4096;
  private static final String DOCTYPE = "article";
  private static final String ERUBY = "erb";

@ -59,7 +61,8 @@ public class Main {
  @Argument(usage = "input files")
  private List<String> inputFiles = new ArrayList<String>();

-  private String mapInFileToOutFile(String inFile) {
+  public static String mapInFileToOutFile(
+      String inFile, String inExt, String outExt) {
    String basename = new File(inFile).getName();
    if (basename.endsWith(inExt)) {
      basename = basename.substring(0, basename.length() - inExt.length());
@ -124,25 +127,41 @@ public class Main {
    }

    ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
-    byte[] buf = new byte[BUFSIZ];
    for (String inputFile : inputFiles) {
      File tmp = File.createTempFile("doc", ".html");
      Options options = createOptions(tmp);
      renderInput(options, inputFile);

-      FileInputStream input = new FileInputStream(tmp);
-      int len;
-      zip.putNextEntry(new ZipEntry(mapInFileToOutFile(inputFile)));
-      while ((len = input.read(buf)) > 0) {
-        zip.write(buf, 0, len);
-      }
-      input.close();
-      tmp.delete();
-      zip.closeEntry();
+      String outputFile = mapInFileToOutFile(inputFile, inExt, outExt);
+      zipFile(tmp, outputFile, zip);
    }
    zip.close();
  }

+  public static void zipDir(File dir, String prefix, ZipOutputStream zip)
+      throws IOException {
+    for (File file : dir.listFiles()) {
+      String name = file.getName();
+      if (!prefix.isEmpty()) {
+        name = prefix + "/" + name;
+      }
+      if (file.isDirectory()) {
+        zipDir(file, name, zip);
+      } else {
+        zipFile(file, name, zip);
+      }
+    }
+  }
+
+  public static void zipFile(File file, String name, ZipOutputStream zip)
+      throws IOException {
+    zip.putNextEntry(new ZipEntry(name));
+    FileInputStream input = new FileInputStream(file);
+    ByteStreams.copy(input, zip);
+    input.close();
+    zip.closeEntry();
+  }
+
  private void renderInput(Options options, String inputFile) {
    Asciidoctor asciidoctor = JRubyAsciidoctor.create();
    asciidoctor.renderFile(new File(inputFile), options);
@ -150,7 +169,7 @@ public class Main {

  public static void main(String[] args) {
    try {
-      new Main().invoke(args);
+      new AsciiDoctor().invoke(args);
    } catch (IOException e) {
      System.err.println(e.getMessage());
      System.exit(1);
--- a/lib/asciidoctor/java/DocIndexer.java
+++ b/lib/asciidoctor/java/DocIndexer.java
@ -0,0 +1,111 @@
+// Copyright (C) 2013 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.ZipOutputStream;
+
+import com.google.common.io.Files;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.lucene.util.Version;
+
+import org.kohsuke.args4j.Argument;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+public class DocIndexer {
+
+  private static final Version LUCENE_VERSION = Version.LUCENE_43;
+  private static final String DOC_FIELD = "doc";
+  private static final String URL_FIELD = "url";
+
+  @Option(name = "-z", usage = "output zip file")
+  private String zipFile;
+
+  @Option(name = "--prefix", usage = "prefix for the html filepath")
+  private String prefix = "";
+
+  @Option(name = "--in-ext", usage = "extension for input files")
+  private String inExt = ".txt";
+
+  @Option(name = "--out-ext", usage = "extension for output files")
+  private String outExt = ".html";
+
+  @Argument(usage = "input files")
+  private List<String> inputFiles = new ArrayList<String>();
+
+  private void invoke(String... parameters) throws IOException {
+    CmdLineParser parser = new CmdLineParser(this);
+    try {
+      parser.parseArgument(parameters);
+      if (inputFiles.isEmpty()) {
+        throw new CmdLineException(parser, "FAILED: input file missing");
+      }
+    } catch (CmdLineException e) {
+      System.err.println(e.getMessage());
+      parser.printUsage(System.err);
+      System.exit(1);
+      return;
+    }
+
+    File tmp = Files.createTempDir();
+    NIOFSDirectory directory = new NIOFSDirectory(tmp);
+    IndexWriterConfig config = new IndexWriterConfig(
+        LUCENE_VERSION,
+        new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
+    config.setOpenMode(OpenMode.CREATE);
+    IndexWriter iwriter = new IndexWriter(directory, config);
+    for (String inputFile : inputFiles) {
+      File file = new File(inputFile);
+      String outputFile = AsciiDoctor.mapInFileToOutFile(
+          inputFile, inExt, outExt);
+      FileReader reader = new FileReader(file);
+      Document doc = new Document();
+      doc.add(new TextField(DOC_FIELD, reader));
+      doc.add(new StringField(
+            URL_FIELD, prefix + outputFile, Field.Store.YES));
+      iwriter.addDocument(doc);
+      reader.close();
+    }
+    iwriter.close();
+
+    ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
+    AsciiDoctor.zipDir(tmp, "", zip);
+    zip.close();
+  }
+
+  public static void main(String[] args) {
+    try {
+      new DocIndexer().invoke(args);
+    } catch (IOException e) {
+      System.err.println(e.getMessage());
+      System.exit(1);
+    }
+  }
+}