Merge "Index documentation using lucene."

This commit is contained in:
Shawn Pearce
2013-09-27 16:32:16 +00:00
committed by Gerrit Code Review
4 changed files with 201 additions and 29 deletions

View File

@@ -2,30 +2,36 @@ include_defs('//Documentation/asciidoc.defs')
include_defs('//Documentation/config.defs') include_defs('//Documentation/config.defs')
include_defs('//tools/git.defs') include_defs('//tools/git.defs')
DOC_DIR = 'Documentation'
INDEX_DIR = DOC_DIR + '/.index'
MAIN = ['//gerrit-pgm:pgm', '//gerrit-gwtui:ui_module'] MAIN = ['//gerrit-pgm:pgm', '//gerrit-gwtui:ui_module']
SRCS = glob(['*.txt'], excludes = ['licenses.txt']) SRCS = glob(['*.txt'], excludes = ['licenses.txt'])
genrule( genrule(
name = 'html', name = 'html',
cmd = 'cd $TMP;' + cmd = 'cd $TMP;' +
'mkdir -p Documentation/images;' + 'mkdir -p %s/images;' % DOC_DIR +
'unzip -q $SRCDIR/only_html.zip -d Documentation/;' + 'unzip -q $SRCDIR/index.zip -d %s/;' % INDEX_DIR +
'for s in $SRCS;do ln -s $s Documentation;done;' + 'unzip -q $SRCDIR/only_html.zip -d %s/;' % DOC_DIR +
'mv Documentation/*.{jpg,png} Documentation/images;' + 'for s in $SRCS;do ln -s $s %s;done;' % DOC_DIR +
'rm Documentation/only_html.zip;' + 'mv %s/*.{jpg,png} %s/images;' % (DOC_DIR, DOC_DIR) +
'rm Documentation/licenses.txt;' + 'rm %s/only_html.zip;' % DOC_DIR +
'rm %s/index.zip;' % DOC_DIR +
'rm %s/licenses.txt;' % DOC_DIR +
'cp $SRCDIR/licenses.txt LICENSES.txt;' + 'cp $SRCDIR/licenses.txt LICENSES.txt;' +
'zip -qr $OUT *', 'zip -qr $OUT *',
srcs = [genfile('only_html.zip')] + srcs = glob([
glob([
'images/*.jpg', 'images/*.jpg',
'images/*.png', 'images/*.png',
]) + [ ]) + [
'doc.css', 'doc.css',
genfile('licenses.txt'), genfile('licenses.txt'),
], genfile('only_html.zip'),
genfile('index.zip'),
],
deps = [ deps = [
':generate_html', ':generate_html',
':index',
':licenses.txt', ':licenses.txt',
], ],
out = 'html.zip', out = 'html.zip',
@@ -57,3 +63,19 @@ python_binary(
name = 'replace_macros', name = 'replace_macros',
main = 'replace_macros.py', main = 'replace_macros.py',
) )
genrule(
name = 'index',
cmd = '$(exe //lib/asciidoctor:doc_indexer) ' +
'-z $OUT ' +
'--prefix "%s/" ' % DOC_DIR +
'--in-ext ".txt" ' +
'--out-ext ".html" ' +
'$SRCS',
srcs = SRCS + [genfile('licenses.txt')],
deps = [
':licenses.txt',
'//lib/asciidoctor:doc_indexer',
],
out = 'index.zip',
)

View File

@@ -2,18 +2,38 @@ include_defs('//lib/maven.defs')
java_binary( java_binary(
name = 'asciidoc', name = 'asciidoc',
main_class = 'Main', main_class = 'AsciiDoctor',
deps = [':main_lib'], deps = [':asciidoc_lib'],
visibility = ['PUBLIC'], visibility = ['PUBLIC'],
) )
java_library( java_library(
name = 'main_lib', name = 'asciidoc_lib',
srcs = ['java/Main.java'], srcs = ['java/AsciiDoctor.java'],
deps = [ deps = [
':asciidoctor', ':asciidoctor',
':jruby', ':jruby',
'//lib:args4j', '//lib:args4j',
'//lib:guava',
],
)
java_binary(
name = 'doc_indexer',
main_class = 'DocIndexer',
deps = [':doc_indexer_lib'],
visibility = ['PUBLIC'],
)
java_library(
name = 'doc_indexer_lib',
srcs = ['java/DocIndexer.java'],
deps = [
':asciidoc_lib',
'//lib:args4j',
'//lib:guava',
'//lib/lucene:analyzers-common',
'//lib/lucene:core',
], ],
) )

View File

@@ -15,6 +15,7 @@
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@@ -23,6 +24,8 @@ import java.util.Map;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream; import java.util.zip.ZipOutputStream;
import com.google.common.io.ByteStreams;
import org.asciidoctor.Asciidoctor; import org.asciidoctor.Asciidoctor;
import org.asciidoctor.AttributesBuilder; import org.asciidoctor.AttributesBuilder;
import org.asciidoctor.Options; import org.asciidoctor.Options;
@@ -34,9 +37,8 @@ import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option; import org.kohsuke.args4j.Option;
public class Main { public class AsciiDoctor {
private static final int BUFSIZ = 4096;
private static final String DOCTYPE = "article"; private static final String DOCTYPE = "article";
private static final String ERUBY = "erb"; private static final String ERUBY = "erb";
@@ -59,7 +61,8 @@ public class Main {
@Argument(usage = "input files") @Argument(usage = "input files")
private List<String> inputFiles = new ArrayList<String>(); private List<String> inputFiles = new ArrayList<String>();
private String mapInFileToOutFile(String inFile) { public static String mapInFileToOutFile(
String inFile, String inExt, String outExt) {
String basename = new File(inFile).getName(); String basename = new File(inFile).getName();
if (basename.endsWith(inExt)) { if (basename.endsWith(inExt)) {
basename = basename.substring(0, basename.length() - inExt.length()); basename = basename.substring(0, basename.length() - inExt.length());
@@ -124,25 +127,41 @@ public class Main {
} }
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile)); ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
byte[] buf = new byte[BUFSIZ];
for (String inputFile : inputFiles) { for (String inputFile : inputFiles) {
File tmp = File.createTempFile("doc", ".html"); File tmp = File.createTempFile("doc", ".html");
Options options = createOptions(tmp); Options options = createOptions(tmp);
renderInput(options, inputFile); renderInput(options, inputFile);
FileInputStream input = new FileInputStream(tmp); String outputFile = mapInFileToOutFile(inputFile, inExt, outExt);
int len; zipFile(tmp, outputFile, zip);
zip.putNextEntry(new ZipEntry(mapInFileToOutFile(inputFile)));
while ((len = input.read(buf)) > 0) {
zip.write(buf, 0, len);
}
input.close();
tmp.delete();
zip.closeEntry();
} }
zip.close(); zip.close();
} }
public static void zipDir(File dir, String prefix, ZipOutputStream zip)
throws IOException {
for (File file : dir.listFiles()) {
String name = file.getName();
if (!prefix.isEmpty()) {
name = prefix + "/" + name;
}
if (file.isDirectory()) {
zipDir(file, name, zip);
} else {
zipFile(file, name, zip);
}
}
}
public static void zipFile(File file, String name, ZipOutputStream zip)
throws IOException {
zip.putNextEntry(new ZipEntry(name));
FileInputStream input = new FileInputStream(file);
ByteStreams.copy(input, zip);
input.close();
zip.closeEntry();
}
private void renderInput(Options options, String inputFile) { private void renderInput(Options options, String inputFile) {
Asciidoctor asciidoctor = JRubyAsciidoctor.create(); Asciidoctor asciidoctor = JRubyAsciidoctor.create();
asciidoctor.renderFile(new File(inputFile), options); asciidoctor.renderFile(new File(inputFile), options);
@@ -150,7 +169,7 @@ public class Main {
public static void main(String[] args) { public static void main(String[] args) {
try { try {
new Main().invoke(args); new AsciiDoctor().invoke(args);
} catch (IOException e) { } catch (IOException e) {
System.err.println(e.getMessage()); System.err.println(e.getMessage());
System.exit(1); System.exit(1);

View File

@@ -0,0 +1,111 @@
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipOutputStream;
import com.google.common.io.Files;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
public class DocIndexer {
private static final Version LUCENE_VERSION = Version.LUCENE_43;
private static final String DOC_FIELD = "doc";
private static final String URL_FIELD = "url";
@Option(name = "-z", usage = "output zip file")
private String zipFile;
@Option(name = "--prefix", usage = "prefix for the html filepath")
private String prefix = "";
@Option(name = "--in-ext", usage = "extension for input files")
private String inExt = ".txt";
@Option(name = "--out-ext", usage = "extension for output files")
private String outExt = ".html";
@Argument(usage = "input files")
private List<String> inputFiles = new ArrayList<String>();
private void invoke(String... parameters) throws IOException {
CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(parameters);
if (inputFiles.isEmpty()) {
throw new CmdLineException(parser, "FAILED: input file missing");
}
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.exit(1);
return;
}
File tmp = Files.createTempDir();
NIOFSDirectory directory = new NIOFSDirectory(tmp);
IndexWriterConfig config = new IndexWriterConfig(
LUCENE_VERSION,
new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
config.setOpenMode(OpenMode.CREATE);
IndexWriter iwriter = new IndexWriter(directory, config);
for (String inputFile : inputFiles) {
File file = new File(inputFile);
String outputFile = AsciiDoctor.mapInFileToOutFile(
inputFile, inExt, outExt);
FileReader reader = new FileReader(file);
Document doc = new Document();
doc.add(new TextField(DOC_FIELD, reader));
doc.add(new StringField(
URL_FIELD, prefix + outputFile, Field.Store.YES));
iwriter.addDocument(doc);
reader.close();
}
iwriter.close();
ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
AsciiDoctor.zipDir(tmp, "", zip);
zip.close();
}
public static void main(String[] args) {
try {
new DocIndexer().invoke(args);
} catch (IOException e) {
System.err.println(e.getMessage());
System.exit(1);
}
}
}