gerrit/lib/asciidoctor/java/DocIndexer.java

// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipOutputStream;

import com.google.common.io.Files;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;

import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;

public class DocIndexer {
  private static final Version LUCENE_VERSION = Version.LUCENE_44;
  private static final String DOC_FIELD = "doc";
  private static final String URL_FIELD = "url";
  private static final String TITLE_FIELD = "title";

  @Option(name = "-z", usage = "output zip file")
  private String zipFile;

  @Option(name = "--prefix", usage = "prefix for the html filepath")
  private String prefix = "";

  @Option(name = "--in-ext", usage = "extension for input files")
  private String inExt = ".txt";

  @Option(name = "--out-ext", usage = "extension for output files")
  private String outExt = ".html";

  @Argument(usage = "input files")
  private List<String> inputFiles = new ArrayList<String>();

  private void invoke(String... parameters) throws IOException {
    CmdLineParser parser = new CmdLineParser(this);
    try {
      parser.parseArgument(parameters);
      if (inputFiles.isEmpty()) {
        throw new CmdLineException(parser, "FAILED: input file missing");
      }
    } catch (CmdLineException e) {
      System.err.println(e.getMessage());
      parser.printUsage(System.err);
      System.exit(1);
      return;
    }

    File tmp = Files.createTempDir();
    NIOFSDirectory directory = new NIOFSDirectory(tmp);
    IndexWriterConfig config = new IndexWriterConfig(
        LUCENE_VERSION,
        new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
    config.setOpenMode(OpenMode.CREATE);
    IndexWriter iwriter = new IndexWriter(directory, config);
    for (String inputFile : inputFiles) {
      File file = new File(inputFile);

      BufferedReader titleReader = new BufferedReader(
          new InputStreamReader(new FileInputStream(file), "UTF-8"));
      String title = titleReader.readLine();
      if (title.startsWith("[[")) {
        // Generally the first line of the txt is the title. In a few cases the
        // first line is a "[[tag]]" and the second line is the title.
        title = titleReader.readLine();
      }
      titleReader.close();

      String outputFile = AsciiDoctor.mapInFileToOutFile(
          inputFile, inExt, outExt);
      FileReader reader = new FileReader(file);
      Document doc = new Document();
      doc.add(new TextField(DOC_FIELD, reader));
      doc.add(new StringField(
            URL_FIELD, prefix + outputFile, Field.Store.YES));
      doc.add(new TextField(TITLE_FIELD, title, Field.Store.YES));
      iwriter.addDocument(doc);
      reader.close();
    }
    iwriter.close();

    ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
    AsciiDoctor.zipDir(tmp, "", zip);
    zip.close();
  }

  public static void main(String[] args) {
    try {
      new DocIndexer().invoke(args);
    } catch (IOException e) {
      System.err.println(e.getMessage());
      System.exit(1);
    }
  }
}