#!/usr/bin/env python # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import argparse import glob import os import re import sys from lxml import etree OS_DOC_TOOLS_DIR = os.path.dirname(__file__) DN2DBK = os.path.join(OS_DOC_TOOLS_DIR, 'resources', 'dn2osdbk.xsl') XML_NS = '{http://www.w3.org/XML/1998/namespace}' TRANSFORMERS = {'chapter': 'ChapterTransformer', 'book': 'BookTransformer'} class XMLFileTransformer(object): """Transform a single DN XML file to docbook. Call the transform() method to generate the docbook output. """ def __init__(self, source_file, toplevel='chapter'): """Initialize an instance. :param source_file: The path to the source DN XML file. :param toplevel: The top level tag of the generated docbook ('book' or 'chapter'). """ self.source_file = source_file self.toplevel = toplevel basename = os.path.basename(self.source_file) self.basename = os.path.splitext(basename)[0] self.xslt_file = DN2DBK def _xslt_transform(self, xml): with open(self.xslt_file) as fd: xslt = fd.read() xslt_root = etree.XML(xslt) transform = etree.XSLT(xslt_root) return transform(xml) def _custom_transform(self, tree): # Set the correct root tag root = tree.getroot() root.tag = '{http://docbook.org/ns/docbook}%s' % self.toplevel # Add a comment to warn that the file is autogenerated comment = etree.Comment("WARNING: This file is automatically " "generated. Do not edit it.") root.insert(0, comment) for item in tree.iter(): # Find tags with an 'id' attribute. In case there are multiple # space-separated IDs, we have to make a choice. id_attrib = '%sid' % XML_NS xml_id = item.get(id_attrib) if xml_id is not None: id_list = xml_id.split(' ') # If the title and an associated reference target match, sphinx # generates a second id like 'id[0-9]' which is hardly usable. # So we take the first id in that case, otherwise the last. if len(id_list) > 1 and re.match(r'id\d+', id_list[-1]): xml_id = id_list[0] else: xml_id = id_list[-1] item.attrib[id_attrib] = xml_id return tree def transform(self): """Generate the docbook XML.""" with open(self.source_file) as fd: source_xml = fd.read() source_doc = etree.XML(source_xml) result_tree = self._xslt_transform(source_doc) result_tree = self._custom_transform(result_tree) return etree.tostring(result_tree, pretty_print=True, xml_declaration=True, encoding="UTF-8") class BaseFolderTransformer(object): """Base class for generating docbook from an DN XML source dir.""" file_toplevel = 'section' def __init__(self, source_dir, output_dir, index='index.xml'): """Initialize an instance. :param source_dir: The DN XML source directory. :param output_dir: The directory in which docbook files will be stored. This directory is created if it doesn't exist. :param index: The name of the index file in the source dir. """ self.source_dir = source_dir self.output_dir = output_dir self.index = index self.includes = [] self.title = None self.parse_index() if not os.path.isdir(self.output_dir): os.makedirs(self.output_dir) def parse_index(self): """Generates part of the docbook index from the DN XML index. The full index is generated in the write_index method. """ src = os.path.join(self.source_dir, 'index.xml') src_xml = etree.XML(open(src).read()) for reference in src_xml.iter('reference'): if '#' in reference.get('refuri'): continue self.includes.append("%s_%s.xml" % (self.file_toplevel, reference.get('refuri'))) self.title = src_xml.find('section/title').text def write_index(self): """Write the index file. This method must be implemented by subclasses. """ raise NotImplementedError def transform(self): """Perform the actual conversion.""" files = glob.glob(os.path.join(self.source_dir, '*.xml')) for src in files: if src.endswith('/index.xml'): continue basename = '%s_%s' % (self.file_toplevel, os.path.basename(src)) output_file = os.path.join(self.output_dir, basename) transformer = XMLFileTransformer(src, self.file_toplevel) xml = transformer.transform() open(output_file, 'w').write(xml) self.write_index() class BookTransformer(BaseFolderTransformer): """Create a docbook book.""" file_toplevel = 'chapter' def write_index(self): output_file = os.path.join(self.output_dir, 'index.xml') xml_id = self.title.lower().replace(' ', '-') includes = "\n ".join([ '' % i for i in self.includes]) output = ''' %(title)s %(includes)s ''' % {'xml_id': xml_id, 'title': self.title, 'includes': includes} open(output_file, 'w').write(output) class ChapterTransformer(BaseFolderTransformer): """Create a docbook chapter.""" file_toplevel = 'section' def write_index(self): output_file = os.path.join(self.output_dir, 'index.xml') xml_id = self.title.lower().replace(' ', '-') includes = "\n ".join([ '' % i for i in self.includes]) output = ''' %(title)s %(includes)s ''' % {'xml_id': xml_id, 'title': self.title, 'includes': includes} open(output_file, 'w').write(output) def main(): parser = argparse.ArgumentParser(description="Generate docbook from " "DocUtils Native XML format") parser.add_argument('source', help='Source file or directory.') parser.add_argument('output', help='Output file or directory.') parser.add_argument('--toplevel', help='Toplevel flag.', choices=['book', 'chapter'], default='chapter') args = parser.parse_args() if os.path.isdir(args.source): cls = globals()[TRANSFORMERS[args.toplevel]] transformer = cls(args.source, args.output) sys.exit(transformer.transform()) else: transformer = XMLFileTransformer(args.source, args.toplevel) xml = transformer.transform() with open(args.output, 'w') as fd: fd.write(xml) sys.exit(0) if __name__ == "__main__": main()