Merge "Index documentation using lucene."

This commit is contained in:
Shawn Pearce
2013-09-27 16:32:16 +00:00
committed by Gerrit Code Review
4 changed files with 201 additions and 29 deletions

View File

@@ -2,30 +2,36 @@ include_defs('//Documentation/asciidoc.defs')
include_defs('//Documentation/config.defs')
include_defs('//tools/git.defs')
DOC_DIR = 'Documentation'
INDEX_DIR = DOC_DIR + '/.index'
MAIN = ['//gerrit-pgm:pgm', '//gerrit-gwtui:ui_module']
SRCS = glob(['*.txt'], excludes = ['licenses.txt'])
genrule(
name = 'html',
cmd = 'cd $TMP;' +
'mkdir -p Documentation/images;' +
'unzip -q $SRCDIR/only_html.zip -d Documentation/;' +
'for s in $SRCS;do ln -s $s Documentation;done;' +
'mv Documentation/*.{jpg,png} Documentation/images;' +
'rm Documentation/only_html.zip;' +
'rm Documentation/licenses.txt;' +
'mkdir -p %s/images;' % DOC_DIR +
'unzip -q $SRCDIR/index.zip -d %s/;' % INDEX_DIR +
'unzip -q $SRCDIR/only_html.zip -d %s/;' % DOC_DIR +
'for s in $SRCS;do ln -s $s %s;done;' % DOC_DIR +
'mv %s/*.{jpg,png} %s/images;' % (DOC_DIR, DOC_DIR) +
'rm %s/only_html.zip;' % DOC_DIR +
'rm %s/index.zip;' % DOC_DIR +
'rm %s/licenses.txt;' % DOC_DIR +
'cp $SRCDIR/licenses.txt LICENSES.txt;' +
'zip -qr $OUT *',
srcs = [genfile('only_html.zip')] +
glob([
srcs = glob([
'images/*.jpg',
'images/*.png',
]) + [
'doc.css',
genfile('licenses.txt'),
],
'doc.css',
genfile('licenses.txt'),
genfile('only_html.zip'),
genfile('index.zip'),
],
deps = [
':generate_html',
':index',
':licenses.txt',
],
out = 'html.zip',
@@ -57,3 +63,19 @@ python_binary(
name = 'replace_macros',
main = 'replace_macros.py',
)
genrule(
name = 'index',
cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
'-z $OUT ' +
'--prefix "%s/" ' % DOC_DIR +
'--in-ext ".txt" ' +
'--out-ext ".html" ' +
'$SRCS',
srcs = SRCS + [genfile('licenses.txt')],
deps = [
':licenses.txt',
'//lib/asciidoctor:doc_indexer',
],
out = 'index.zip',
)

View File

@@ -2,18 +2,38 @@ include_defs('//lib/maven.defs')
java_binary(
name = 'asciidoc',
main_class = 'Main',
deps = [':main_lib'],
main_class = 'AsciiDoctor',
deps = [':asciidoc_lib'],
visibility = ['PUBLIC'],
)
java_library(
name = 'main_lib',
srcs = ['java/Main.java'],
name = 'asciidoc_lib',
srcs = ['java/AsciiDoctor.java'],
deps = [
':asciidoctor',
':jruby',
'//lib:args4j',
'//lib:guava',
],
)
java_binary(
name = 'doc_indexer',
main_class = 'DocIndexer',
deps = [':doc_indexer_lib'],
visibility = ['PUBLIC'],
)
java_library(
name = 'doc_indexer_lib',
srcs = ['java/DocIndexer.java'],
deps = [
':asciidoc_lib',
'//lib:args4j',
'//lib:guava',
'//lib/lucene:analyzers-common',
'//lib/lucene:core',
],
)

View File

@@ -15,6 +15,7 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -23,6 +24,8 @@ import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import com.google.common.io.ByteStreams;
import org.asciidoctor.Asciidoctor;
import org.asciidoctor.AttributesBuilder;
import org.asciidoctor.Options;
@@ -34,9 +37,8 @@ import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
public class Main {
public class AsciiDoctor {
private static final int BUFSIZ = 4096;
private static final String DOCTYPE = "article";
private static final String ERUBY = "erb";
@@ -59,7 +61,8 @@ public class Main {
@Argument(usage = "input files")
private List<String> inputFiles = new ArrayList<String>();
private String mapInFileToOutFile(String inFile) {
public static String mapInFileToOutFile(
String inFile, String inExt, String outExt) {
String basename = new File(inFile).getName();
if (basename.endsWith(inExt)) {
basename = basename.substring(0, basename.length() - inExt.length());
@@ -124,25 +127,41 @@ public class Main {
}
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
byte[] buf = new byte[BUFSIZ];
for (String inputFile : inputFiles) {
File tmp = File.createTempFile("doc", ".html");
Options options = createOptions(tmp);
renderInput(options, inputFile);
FileInputStream input = new FileInputStream(tmp);
int len;
zip.putNextEntry(new ZipEntry(mapInFileToOutFile(inputFile)));
while ((len = input.read(buf)) > 0) {
zip.write(buf, 0, len);
}
input.close();
tmp.delete();
zip.closeEntry();
String outputFile = mapInFileToOutFile(inputFile, inExt, outExt);
zipFile(tmp, outputFile, zip);
}
zip.close();
}
public static void zipDir(File dir, String prefix, ZipOutputStream zip)
throws IOException {
for (File file : dir.listFiles()) {
String name = file.getName();
if (!prefix.isEmpty()) {
name = prefix + "/" + name;
}
if (file.isDirectory()) {
zipDir(file, name, zip);
} else {
zipFile(file, name, zip);
}
}
}
public static void zipFile(File file, String name, ZipOutputStream zip)
throws IOException {
zip.putNextEntry(new ZipEntry(name));
FileInputStream input = new FileInputStream(file);
ByteStreams.copy(input, zip);
input.close();
zip.closeEntry();
}
private void renderInput(Options options, String inputFile) {
Asciidoctor asciidoctor = JRubyAsciidoctor.create();
asciidoctor.renderFile(new File(inputFile), options);
@@ -150,7 +169,7 @@ public class Main {
public static void main(String[] args) {
try {
new Main().invoke(args);
new AsciiDoctor().invoke(args);
} catch (IOException e) {
System.err.println(e.getMessage());
System.exit(1);

View File

@@ -0,0 +1,111 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipOutputStream;
import com.google.common.io.Files;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
public class DocIndexer {
private static final Version LUCENE_VERSION = Version.LUCENE_43;
private static final String DOC_FIELD = "doc";
private static final String URL_FIELD = "url";
@Option(name = "-z", usage = "output zip file")
private String zipFile;
@Option(name = "--prefix", usage = "prefix for the html filepath")
private String prefix = "";
@Option(name = "--in-ext", usage = "extension for input files")
private String inExt = ".txt";
@Option(name = "--out-ext", usage = "extension for output files")
private String outExt = ".html";
@Argument(usage = "input files")
private List<String> inputFiles = new ArrayList<String>();
private void invoke(String... parameters) throws IOException {
CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(parameters);
if (inputFiles.isEmpty()) {
throw new CmdLineException(parser, "FAILED: input file missing");
}
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.exit(1);
return;
}
File tmp = Files.createTempDir();
NIOFSDirectory directory = new NIOFSDirectory(tmp);
IndexWriterConfig config = new IndexWriterConfig(
LUCENE_VERSION,
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
config.setOpenMode(OpenMode.CREATE);
IndexWriter iwriter = new IndexWriter(directory, config);
for (String inputFile : inputFiles) {
File file = new File(inputFile);
String outputFile = AsciiDoctor.mapInFileToOutFile(
inputFile, inExt, outExt);
FileReader reader = new FileReader(file);
Document doc = new Document();
doc.add(new TextField(DOC_FIELD, reader));
doc.add(new StringField(
URL_FIELD, prefix + outputFile, Field.Store.YES));
iwriter.addDocument(doc);
reader.close();
}
iwriter.close();
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
AsciiDoctor.zipDir(tmp, "", zip);
zip.close();
}
public static void main(String[] args) {
try {
new DocIndexer().invoke(args);
} catch (IOException e) {
System.err.println(e.getMessage());
System.exit(1);
}
}
}