releases/openstack_releases/rst2txt.py
Doug Hellmann 6cf9010df5 handle reno 2.9.0 anchor references
Reno produces anchor references for sections automatically now, and
those don't have URIs set up so we want to ignore them. If we do have
another target that does have a URI we want to include that, so test
whether we have the URI before including the target info in the
output.

Change-Id: I84f1b67b3078662f72c09c0f69bd09a45a7fe83e
Signed-off-by: Doug Hellmann <doug@doughellmann.com>
2018-04-25 15:20:30 -04:00

973 lines
26 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Convert reStructuredText to plain text.
Most of this code is adapted from sphinx.writers.text, Copyright
2007-2015 by the Sphinx team (see
git://github.com/sphinx-doc/sphinx). The code is imported here because
it has to be modified to not rely on other internal Sphinx components
that we don't have set up, such as a Builder and Application object.
"""
import itertools
import os
import re
import textwrap
from docutils import core
from docutils import nodes
from docutils.utils import column_width
from docutils import writers
from six.moves import zip_longest
from sphinx import addnodes
def convert(source):
"""Given reStructuredText input, return formatted plain text output."""
return core.publish_string(
source=source,
writer=TextWriter(),
)
class TextWrapper(textwrap.TextWrapper):
"""Custom subclass that uses a different word separator regex."""
wordsep_re = re.compile(
r'(\s+|' # any whitespace
r'(?<=\s)(?::[a-z-]+:)?`\S+|' # interpreted text start
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
def _wrap_chunks(self, chunks):
"""_wrap_chunks(chunks : [string]) -> [string]
The original _wrap_chunks uses len() to calculate width.
This method respects wide/fullwidth characters for width adjustment.
"""
drop_whitespace = getattr(self, 'drop_whitespace', True) # py25 compat
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
chunks.reverse()
while chunks:
cur_line = []
cur_len = 0
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
width = self.width - column_width(indent)
if drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = column_width(chunks[-1])
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
else:
break
if chunks and column_width(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
if drop_whitespace and cur_line and cur_line[-1].strip() == '':
del cur_line[-1]
if cur_line:
lines.append(indent + ''.join(cur_line))
return lines
def _break_word(self, word, space_left):
"""_break_word(word : string, space_left : int) -> (string, string)
Break line by unicode width instead of len(word).
"""
total = 0
for i, c in enumerate(word):
total += column_width(c)
if total > space_left:
return word[:i - 1], word[i - 1:]
return word, ''
def _split(self, text):
"""_split(text : string) -> [string]
Override original method that only split by 'wordsep_re'.
This '_split' split wide-characters into chunk by one character.
"""
def split(t):
return textwrap.TextWrapper._split(self, t)
chunks = []
for chunk in split(text):
for w, g in itertools.groupby(chunk, column_width):
if w == 1:
chunks.extend(split(''.join(g)))
else:
chunks.extend(list(g))
return chunks
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""Deal with long words
_handle_long_word(chunks : [string],
cur_line : [string],
cur_len : int, width : int)
Override original method for using self._break_word() instead
of slice.
"""
space_left = max(width - cur_len, 1)
if self.break_long_words:
l, r = self._break_word(reversed_chunks[-1], space_left)
cur_line.append(l)
reversed_chunks[-1] = r
elif not cur_line:
cur_line.append(reversed_chunks.pop())
MAXWIDTH = 70
STDINDENT = 3
def my_wrap(text, width=MAXWIDTH, **kwargs):
w = TextWrapper(width=width, **kwargs)
return w.wrap(text)
class TextWriter(writers.Writer):
supported = ('text',)
settings_spec = ('No options here.', '', ())
settings_defaults = {}
output = None
def __init__(self):
writers.Writer.__init__(self)
self.translator_class = TextTranslator
def translate(self):
visitor = self.translator_class(self.document)
self.document.walkabout(visitor)
self.output = visitor.body
class TextTranslator(nodes.NodeVisitor):
sectionchars = '*=-~"+`'
def __init__(self, document):
nodes.NodeVisitor.__init__(self, document)
self.nl = os.linesep
self.states = [[]]
self.stateindent = [0]
self.list_counter = []
self.sectionlevel = 0
self.lineblocklevel = 0
self.table = None
def add_text(self, text):
self.states[-1].append((-1, text))
def new_state(self, indent=STDINDENT):
self.states.append([])
self.stateindent.append(indent)
def end_state(self, wrap=True, end=[''], first=None):
content = self.states.pop()
maxindent = sum(self.stateindent)
indent = self.stateindent.pop()
result = []
toformat = []
def do_format():
if not toformat:
return
if wrap:
res = my_wrap(''.join(toformat), width=MAXWIDTH - maxindent)
else:
res = ''.join(toformat).splitlines()
if end:
res += end
result.append((indent, res))
for itemindent, item in content:
if itemindent == -1:
toformat.append(item)
else:
do_format()
result.append((indent + itemindent, item))
toformat = []
do_format()
if first is not None and result:
itemindent, item = result[0]
result_rest, result = result[1:], []
if item:
toformat = [first + ' '.join(item)]
do_format() # re-create `result` from `toformat`
_dummy, new_item = result[0]
result.insert(0, (itemindent - indent, [new_item[0]]))
result[1] = (itemindent, new_item[1:])
result.extend(result_rest)
self.states[-1].extend(result)
def visit_document(self, node):
self.new_state(0)
def depart_document(self, node):
self.end_state()
self.body = self.nl.join(line and (' ' * indent + line)
for indent, lines in self.states[0]
for line in lines)
# XXX header/footer?
def visit_highlightlang(self, node):
raise nodes.SkipNode
def visit_section(self, node):
self._title_char = self.sectionchars[self.sectionlevel]
self.sectionlevel += 1
def depart_section(self, node):
self.sectionlevel -= 1
def visit_topic(self, node):
self.new_state(0)
def depart_topic(self, node):
self.end_state()
visit_sidebar = visit_topic
depart_sidebar = depart_topic
def visit_rubric(self, node):
self.new_state(0)
self.add_text('-[ ')
def depart_rubric(self, node):
self.add_text(' ]-')
self.end_state()
def visit_compound(self, node):
pass
def depart_compound(self, node):
pass
def visit_glossary(self, node):
pass
def depart_glossary(self, node):
pass
def visit_title(self, node):
if isinstance(node.parent, nodes.Admonition):
self.add_text(node.astext() + ': ')
raise nodes.SkipNode
self.new_state(0)
def depart_title(self, node):
if isinstance(node.parent, nodes.section):
char = self._title_char
else:
char = '^'
text = ''.join(x[1] for x in self.states.pop() if x[0] == -1)
self.stateindent.pop()
self.states[-1].append(
(0, ['', text, '%s' % (char * column_width(text)), '']))
def visit_subtitle(self, node):
pass
def depart_subtitle(self, node):
pass
def visit_attribution(self, node):
self.add_text('-- ')
def depart_attribution(self, node):
pass
def visit_desc(self, node):
pass
def depart_desc(self, node):
pass
def visit_desc_signature(self, node):
self.new_state(0)
if node.parent['objtype'] in ('class', 'exception'):
self.add_text('%s ' % node.parent['objtype'])
def depart_desc_signature(self, node):
# XXX: wrap signatures in a way that makes sense
self.end_state(wrap=False, end=None)
def visit_desc_name(self, node):
pass
def depart_desc_name(self, node):
pass
def visit_desc_addname(self, node):
pass
def depart_desc_addname(self, node):
pass
def visit_desc_type(self, node):
pass
def depart_desc_type(self, node):
pass
def visit_desc_returns(self, node):
self.add_text(' -> ')
def depart_desc_returns(self, node):
pass
def visit_desc_parameterlist(self, node):
self.add_text('(')
self.first_param = 1
def depart_desc_parameterlist(self, node):
self.add_text(')')
def visit_desc_parameter(self, node):
if not self.first_param:
self.add_text(', ')
else:
self.first_param = 0
self.add_text(node.astext())
raise nodes.SkipNode
def visit_desc_optional(self, node):
self.add_text('[')
def depart_desc_optional(self, node):
self.add_text(']')
def visit_desc_annotation(self, node):
pass
def depart_desc_annotation(self, node):
pass
def visit_desc_content(self, node):
self.new_state()
self.add_text(self.nl)
def depart_desc_content(self, node):
self.end_state()
def visit_figure(self, node):
self.new_state()
def depart_figure(self, node):
self.end_state()
def visit_caption(self, node):
pass
def depart_caption(self, node):
pass
def visit_productionlist(self, node):
self.new_state()
names = []
for production in node:
names.append(production['tokenname'])
maxlen = max(len(name) for name in names)
lastname = None
for production in node:
if production['tokenname']:
self.add_text(production['tokenname'].ljust(maxlen) + ' ::=')
lastname = production['tokenname']
elif lastname is not None:
self.add_text('%s ' % (' ' * len(lastname)))
self.add_text(production.astext() + self.nl)
self.end_state(wrap=False)
raise nodes.SkipNode
def visit_footnote(self, node):
self._footnote = node.children[0].astext().strip()
self.new_state(len(self._footnote) + 3)
def depart_footnote(self, node):
self.end_state(first='[%s] ' % self._footnote)
def visit_citation(self, node):
if len(node) and isinstance(node[0], nodes.label):
self._citlabel = node[0].astext()
else:
self._citlabel = ''
self.new_state(len(self._citlabel) + 3)
def depart_citation(self, node):
self.end_state(first='[%s] ' % self._citlabel)
def visit_label(self, node):
raise nodes.SkipNode
def visit_legend(self, node):
pass
def depart_legend(self, node):
pass
# XXX: option list could use some better styling
def visit_option_list(self, node):
pass
def depart_option_list(self, node):
pass
def visit_option_list_item(self, node):
self.new_state(0)
def depart_option_list_item(self, node):
self.end_state()
def visit_option_group(self, node):
self._firstoption = True
def depart_option_group(self, node):
self.add_text(' ')
def visit_option(self, node):
if self._firstoption:
self._firstoption = False
else:
self.add_text(', ')
def depart_option(self, node):
pass
def visit_option_string(self, node):
pass
def depart_option_string(self, node):
pass
def visit_option_argument(self, node):
self.add_text(node['delimiter'])
def depart_option_argument(self, node):
pass
def visit_description(self, node):
pass
def depart_description(self, node):
pass
def visit_tabular_col_spec(self, node):
raise nodes.SkipNode
def visit_colspec(self, node):
self.table[0].append(node['colwidth'])
raise nodes.SkipNode
def visit_tgroup(self, node):
pass
def depart_tgroup(self, node):
pass
def visit_thead(self, node):
pass
def depart_thead(self, node):
pass
def visit_tbody(self, node):
self.table.append('sep')
def depart_tbody(self, node):
pass
def visit_row(self, node):
self.table.append([])
def depart_row(self, node):
pass
def visit_entry(self, node):
if 'morerows' in node or 'morecols' in node:
raise NotImplementedError('Column or row spanning cells are '
'not implemented.')
self.new_state(0)
def depart_entry(self, node):
text = self.nl.join(self.nl.join(x[1]) for x in self.states.pop())
self.stateindent.pop()
self.table[-1].append(text)
def visit_table(self, node):
if self.table:
raise NotImplementedError('Nested tables are not supported.')
self.new_state(0)
self.table = [[]]
def depart_table(self, node):
lines = self.table[1:]
fmted_rows = []
colwidths = self.table[0]
realwidths = colwidths[:]
separator = 0
# don't allow paragraphs in table cells for now
for line in lines:
if line == 'sep':
separator = len(fmted_rows)
else:
cells = []
for i, cell in enumerate(line):
par = my_wrap(cell, width=colwidths[i])
if par:
maxwidth = max(column_width(x) for x in par)
else:
maxwidth = 0
realwidths[i] = max(realwidths[i], maxwidth)
cells.append(par)
fmted_rows.append(cells)
def writesep(char='-'):
out = ['+']
for width in realwidths:
out.append(char * (width + 2))
out.append('+')
self.add_text(''.join(out) + self.nl)
def writerow(row):
lines = zip_longest(*row)
for line in lines:
out = ['|']
for i, cell in enumerate(line):
if cell:
adjust_len = len(cell) - column_width(cell)
out.append(' ' + cell.ljust(
realwidths[i] + 1 + adjust_len))
else:
out.append(' ' * (realwidths[i] + 2))
out.append('|')
self.add_text(''.join(out) + self.nl)
for i, row in enumerate(fmted_rows):
if separator and i == separator:
writesep('=')
else:
writesep('-')
writerow(row)
writesep('-')
self.table = None
self.end_state(wrap=False)
def visit_acks(self, node):
self.new_state(0)
self.add_text(
', '.join(n.astext() for n in node.children[0].children) +
'.')
self.end_state()
raise nodes.SkipNode
def visit_image(self, node):
if 'alt' in node.attributes:
self.add_text('[image: %s]' % node['alt'])
self.add_text('[image]')
raise nodes.SkipNode
def visit_transition(self, node):
indent = sum(self.stateindent)
self.new_state(0)
self.add_text('=' * (MAXWIDTH - indent))
self.end_state()
raise nodes.SkipNode
def visit_bullet_list(self, node):
self.list_counter.append(-1)
def depart_bullet_list(self, node):
self.list_counter.pop()
def visit_enumerated_list(self, node):
self.list_counter.append(node.get('start', 1) - 1)
def depart_enumerated_list(self, node):
self.list_counter.pop()
def visit_definition_list(self, node):
self.list_counter.append(-2)
def depart_definition_list(self, node):
self.list_counter.pop()
def visit_list_item(self, node):
if self.list_counter[-1] == -1:
# bullet list
self.new_state(2)
elif self.list_counter[-1] == -2:
# definition list
pass
else:
# enumerated list
self.list_counter[-1] += 1
self.new_state(len(str(self.list_counter[-1])) + 2)
def depart_list_item(self, node):
if self.list_counter[-1] == -1:
self.end_state(first='* ')
elif self.list_counter[-1] == -2:
pass
else:
self.end_state(first='%s. ' % self.list_counter[-1])
def visit_definition_list_item(self, node):
self._li_has_classifier = len(node) >= 2 and \
isinstance(node[1], nodes.classifier)
def depart_definition_list_item(self, node):
pass
def visit_term(self, node):
self.new_state(0)
def depart_term(self, node):
if not self._li_has_classifier:
self.end_state(end=None)
def visit_termsep(self, node):
self.add_text(', ')
raise nodes.SkipNode
def visit_classifier(self, node):
self.add_text(' : ')
def depart_classifier(self, node):
self.end_state(end=None)
def visit_definition(self, node):
self.new_state()
def depart_definition(self, node):
self.end_state()
def visit_field_list(self, node):
pass
def depart_field_list(self, node):
pass
def visit_field(self, node):
pass
def depart_field(self, node):
pass
def visit_field_name(self, node):
self.new_state(0)
def depart_field_name(self, node):
self.add_text(':')
self.end_state(end=None)
def visit_field_body(self, node):
self.new_state()
def depart_field_body(self, node):
self.end_state()
def visit_centered(self, node):
pass
def depart_centered(self, node):
pass
def visit_hlist(self, node):
pass
def depart_hlist(self, node):
pass
def visit_hlistcol(self, node):
pass
def depart_hlistcol(self, node):
pass
def visit_admonition(self, node):
self.new_state(0)
def depart_admonition(self, node):
self.end_state()
def _visit_admonition(self, node):
self.new_state(2)
def _make_depart_admonition(name):
def depart_admonition(self, node):
self.end_state(first=name.title() + ': ')
return depart_admonition
visit_attention = _visit_admonition
depart_attention = _make_depart_admonition('attention')
visit_caution = _visit_admonition
depart_caution = _make_depart_admonition('caution')
visit_danger = _visit_admonition
depart_danger = _make_depart_admonition('danger')
visit_error = _visit_admonition
depart_error = _make_depart_admonition('error')
visit_hint = _visit_admonition
depart_hint = _make_depart_admonition('hint')
visit_important = _visit_admonition
depart_important = _make_depart_admonition('important')
visit_note = _visit_admonition
depart_note = _make_depart_admonition('note')
visit_tip = _visit_admonition
depart_tip = _make_depart_admonition('tip')
visit_warning = _visit_admonition
depart_warning = _make_depart_admonition('warning')
visit_seealso = _visit_admonition
depart_seealso = _make_depart_admonition('seealso')
def visit_versionmodified(self, node):
self.new_state(0)
def depart_versionmodified(self, node):
self.end_state()
def visit_literal_block(self, node):
self.new_state()
def depart_literal_block(self, node):
self.end_state(wrap=False)
def visit_doctest_block(self, node):
self.new_state(0)
def depart_doctest_block(self, node):
self.end_state(wrap=False)
def visit_line_block(self, node):
self.new_state()
self.lineblocklevel += 1
def depart_line_block(self, node):
self.lineblocklevel -= 1
self.end_state(wrap=False, end=None)
if not self.lineblocklevel:
self.add_text('\n')
def visit_line(self, node):
pass
def depart_line(self, node):
self.add_text('\n')
def visit_block_quote(self, node):
self.new_state()
def depart_block_quote(self, node):
self.end_state()
def visit_compact_paragraph(self, node):
pass
def depart_compact_paragraph(self, node):
pass
def visit_paragraph(self, node):
if not isinstance(node.parent, nodes.Admonition) or \
isinstance(node.parent, addnodes.seealso):
self.new_state(0)
def depart_paragraph(self, node):
if not isinstance(node.parent, nodes.Admonition) or \
isinstance(node.parent, addnodes.seealso):
self.end_state()
def visit_target(self, node):
pass
def depart_target(self, node):
# Reno produces anchor references for sections automatically
# now, and those don't have URIs set up so we want to ignore
# them. If we do have another target that does have a URI we
# want to include that, so test whether we have the URI before
# including the target info in the output.
if 'refuri' in node:
self.add_text(' (%s)' % node['refuri'])
def visit_index(self, node):
raise nodes.SkipNode
def visit_toctree(self, node):
raise nodes.SkipNode
def visit_substitution_definition(self, node):
raise nodes.SkipNode
def visit_pending_xref(self, node):
pass
def depart_pending_xref(self, node):
pass
def visit_reference(self, node):
pass
def depart_reference(self, node):
pass
def visit_number_reference(self, node):
text = nodes.Text(node.get('title', '#'))
self.visit_Text(text)
raise nodes.SkipNode
def visit_download_reference(self, node):
pass
def depart_download_reference(self, node):
pass
def visit_emphasis(self, node):
self.add_text('*')
def depart_emphasis(self, node):
self.add_text('*')
def visit_literal_emphasis(self, node):
self.add_text('*')
def depart_literal_emphasis(self, node):
self.add_text('*')
def visit_strong(self, node):
self.add_text('**')
def depart_strong(self, node):
self.add_text('**')
def visit_literal_strong(self, node):
self.add_text('**')
def depart_literal_strong(self, node):
self.add_text('**')
def visit_abbreviation(self, node):
self.add_text('')
def depart_abbreviation(self, node):
if node.hasattr('explanation'):
self.add_text(' (%s)' % node['explanation'])
def visit_title_reference(self, node):
self.add_text('*')
def depart_title_reference(self, node):
self.add_text('*')
def visit_literal(self, node):
self.add_text('"')
def depart_literal(self, node):
self.add_text('"')
def visit_subscript(self, node):
self.add_text('_')
def depart_subscript(self, node):
pass
def visit_superscript(self, node):
self.add_text('^')
def depart_superscript(self, node):
pass
def visit_footnote_reference(self, node):
self.add_text('[%s]' % node.astext())
raise nodes.SkipNode
def visit_citation_reference(self, node):
self.add_text('[%s]' % node.astext())
raise nodes.SkipNode
def visit_Text(self, node):
self.add_text(node.astext())
def depart_Text(self, node):
pass
def visit_generated(self, node):
pass
def depart_generated(self, node):
pass
def visit_inline(self, node):
if 'xref' in node['classes'] or 'term' in node['classes']:
self.add_text('*')
def depart_inline(self, node):
if 'xref' in node['classes'] or 'term' in node['classes']:
self.add_text('*')
def visit_container(self, node):
pass
def depart_container(self, node):
pass
def visit_problematic(self, node):
pass
def depart_problematic(self, node):
pass
def visit_system_message(self, node):
raise nodes.SkipNode
def visit_comment(self, node):
raise nodes.SkipNode
def visit_meta(self, node):
# only valid for HTML
raise nodes.SkipNode
def visit_raw(self, node):
if 'text' in node.get('format', '').split():
self.body.append(node.astext())
raise nodes.SkipNode
def visit_math(self, node):
raise nodes.SkipNode
visit_math_block = visit_math
def unknown_visit(self, node):
raise NotImplementedError('Unknown node: ' + node.__class__.__name__)