openstack-manuals/tools/glossary2rst.py
Andreas Jaeger a8224a68b5 Add RST Glossary
Automatically convert the glossary from DocBook XML to RST with each
build and use it for building the RST guides.

Add the glossary to User Guides and Networking Guide.

Use glossary in User Guides in places where it was used before. This
removes one TODO.

Fix CORS capitalization.

Partial-Bug: #1421813
Change-Id: I3fba6d54b01759f39f8e1467a7606cdbd2dd0165
2015-04-14 19:57:35 +00:00

176 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import re
import sys
from lxml import etree
SKIP_TAGS = ['indexterm', 'info', 'primary', 'title']
KNOWN_TAGS = [
'code',
'command',
'filename',
'link',
'literal',
'glossary',
'glossdef',
'glossdiv',
'glossentry',
'glossterm',
'para',
'phrase',
'systemitem',
'title'
]
# Marks the first glossdiv entry
FIRST_GLOSSDIV = True
def remove_indent(s):
"""Remove indention of paragraph."""
s = "\n".join(i.lstrip() for i in s.splitlines())
return s
def concat(element):
"""Concatenate element and its children."""
s = ""
if element.text is not None:
s += remove_indent(element.text)
# Add all children
for i in element.getchildren():
s += convert(i)
if i.tail is not None:
if len(s) > 0 and not s[-1].isspace() and i.tail[0] in " \t":
s += i.tail[0]
s += remove_indent(i.tail)
return s
def indent(element, indent):
"""Indent paragraph."""
start = "\n\n"
lines = [" " * indent + i for i in concat(element).splitlines()
if i and not i.isspace()]
return start + "\n".join(lines)
def link(element):
# Only handles the single link we need...
href = element.attrib['{http://www.w3.org/1999/xlink}href']
href = href.lstrip()
if href == "http://git.openstack.org/cgit/openstack/openstack-manuals":
s = ("`openstack/openstack-manuals repository " +
"<http://git.openstack.org/cgit/openstack/openstack-manuals>`__")
return s
print ("link not handled %s" % href)
sys.exit(1)
def glossdiv(element):
s = ""
global FIRST_GLOSSDIV
if FIRST_GLOSSDIV:
s = '.. glossary::\n\n'
FIRST_GLOSSDIV = False
return s + concat(element)
def glossentry(element):
s = "\n"
glossterm = element.find("{http://docbook.org/ns/docbook}glossterm")
if glossterm.text is not None:
s += " " + glossterm.text + "\n"
glossdef = element.find("{http://docbook.org/ns/docbook}glossdef")
if glossdef is not None:
s += indent(glossdef, 6)
return s
def convert(element):
if not isinstance(element.tag, basestring):
print("Element %s not handled, aborting!" % element.text)
tag = element.tag.replace('{http://docbook.org/ns/docbook}', '')
if tag in SKIP_TAGS:
return ""
if tag not in KNOWN_TAGS:
print("Tag %s not handled, aborting!" % tag)
sys.exit(1)
if tag == "command":
return ":command:`%s`" % element.text
if tag == "code":
return "`%s`" % element.text
if tag == "filename":
return ":file:`%s`" % element.text
if tag == "glossary":
return concat(element)
if tag == "glossdiv":
return glossdiv(element)
if tag == "glossentry":
return glossentry(element)
if tag == "link":
return link(element)
if tag == "literal":
return "`%s`" % element.text
if tag == "para":
return "\n\n" + concat(element)
if tag == "phrase":
return element.text
if tag == "systemitem":
return element.text
print("not handled tag %s - %s" % (element.tag, element.text))
sys.exit(1)
def rst_convert(element):
output = convert(element)
# Replace multiple empty lines with single empty line
output = re.sub(r"\n{3,}", "\n\n", output)
return output
def glossary_convert(filename):
try:
parser = etree.XMLParser(remove_comments=True)
doc = etree.parse(filename, parser=parser)
except etree.XMLSyntaxError as e:
print(" Warning: file %s is invalid XML: %s" % (filename, e))
rstcontent = rst_convert(doc.getroot()).encode('utf-8')
heading = "========\n" + "Glossary\n" + "========\n\n"
heading += ".. comments\n"
heading += " This file file is automatically generated, edit the master"
heading += " doc/glossary/glossary-terms.xml to update it."
if len(sys.argv) != 2:
print(heading + rstcontent)
else:
with open(sys.argv[1], 'w') as fp:
fp.write(heading)
fp.write(rstcontent)
if __name__ == '__main__':
glossary_convert("doc/glossary/glossary-terms.xml")