Files
gerrit/lib/asciidoctor/java/DocIndexer.java
Yuxuan 'fishy' Wang c5b0af0c77 Index documentation using lucene.
We'll also index documentation while generating them.

This CL will just store and pack the index files, but won't use them.
Another CL will provide the search UI to use the index files.

Index files will be stored at .index/ directory.

Change-Id: I3f2fa01088f94aaa2e449b3df6c018895df2c5a8
2013-09-27 09:31:32 -07:00

112 lines
3.7 KiB
Java

// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipOutputStream;
import com.google.common.io.Files;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
public class DocIndexer {
private static final Version LUCENE_VERSION = Version.LUCENE_43;
private static final String DOC_FIELD = "doc";
private static final String URL_FIELD = "url";
@Option(name = "-z", usage = "output zip file")
private String zipFile;
@Option(name = "--prefix", usage = "prefix for the html filepath")
private String prefix = "";
@Option(name = "--in-ext", usage = "extension for input files")
private String inExt = ".txt";
@Option(name = "--out-ext", usage = "extension for output files")
private String outExt = ".html";
@Argument(usage = "input files")
private List<String> inputFiles = new ArrayList<String>();
private void invoke(String... parameters) throws IOException {
CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(parameters);
if (inputFiles.isEmpty()) {
throw new CmdLineException(parser, "FAILED: input file missing");
}
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.exit(1);
return;
}
File tmp = Files.createTempDir();
NIOFSDirectory directory = new NIOFSDirectory(tmp);
IndexWriterConfig config = new IndexWriterConfig(
LUCENE_VERSION,
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
config.setOpenMode(OpenMode.CREATE);
IndexWriter iwriter = new IndexWriter(directory, config);
for (String inputFile : inputFiles) {
File file = new File(inputFile);
String outputFile = AsciiDoctor.mapInFileToOutFile(
inputFile, inExt, outExt);
FileReader reader = new FileReader(file);
Document doc = new Document();
doc.add(new TextField(DOC_FIELD, reader));
doc.add(new StringField(
URL_FIELD, prefix + outputFile, Field.Store.YES));
iwriter.addDocument(doc);
reader.close();
}
iwriter.close();
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
AsciiDoctor.zipDir(tmp, "", zip);
zip.close();
}
public static void main(String[] args) {
try {
new DocIndexer().invoke(args);
} catch (IOException e) {
System.err.println(e.getMessage());
System.exit(1);
}
}
}