doc8/doc8/parser.py

145 lines
4.6 KiB
Python

# Copyright (C) 2014 Ivan Melnikov <iv at altlinux dot org>
#
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import errno
import os
import threading
import chardet
from docutils import frontend
from docutils import parsers as docutils_parser
from docutils import utils
import restructuredtext_lint as rl
import six
class ParsedFile(object):
FALLBACK_ENCODING = 'utf-8'
def __init__(self, filename, encoding=None, default_extension=''):
self._filename = filename
self._content = None
self._raw_content = None
self._encoding = encoding
self._doc = None
self._errors = None
self._lines = None
self._has_read = False
self._extension = os.path.splitext(filename)[1]
self._read_lock = threading.Lock()
if not self._extension:
self._extension = default_extension
@property
def errors(self):
if self._errors is not None:
return self._errors
self._errors = rl.lint(self.contents, filepath=self.filename)
return self._errors
@property
def document(self):
if self._doc is None:
# Use the rst parsers document output to do as much of the
# validation as we can without resorting to custom logic (this
# parser is what sphinx and others use anyway so it's hopefully
# mature).
parser_cls = docutils_parser.get_parser_class("rst")
parser = parser_cls()
defaults = {
'halt_level': 5,
'report_level': 5,
'quiet': True,
'file_insertion_enabled': False,
'traceback': True,
# Development use only.
'dump_settings': False,
'dump_internals': False,
'dump_transforms': False,
}
opt = frontend.OptionParser(components=[parser], defaults=defaults)
doc = utils.new_document(source_path=self.filename,
settings=opt.get_default_values())
parser.parse(self.contents, doc)
self._doc = doc
return self._doc
def _read(self):
if self._has_read:
return
with self._read_lock:
if not self._has_read:
with open(self.filename, 'rb') as fh:
self._lines = list(fh)
fh.seek(0)
self._raw_content = fh.read()
self._has_read = True
def lines_iter(self, remove_trailing_newline=True):
self._read()
for line in self._lines:
line = six.text_type(line, encoding=self.encoding)
if remove_trailing_newline and line.endswith("\n"):
line = line[0:-1]
yield line
@property
def lines(self):
self._read()
return self._lines
@property
def extension(self):
return self._extension
@property
def filename(self):
return self._filename
@property
def encoding(self):
if not self._encoding:
encoding = chardet.detect(self.raw_contents)['encoding']
if not encoding:
encoding = self.FALLBACK_ENCODING
self._encoding = encoding
return self._encoding
@property
def raw_contents(self):
self._read()
return self._raw_content
@property
def contents(self):
if self._content is None:
self._content = six.text_type(self.raw_contents,
encoding=self.encoding)
return self._content
def __str__(self):
return "%s (%s, %s chars, %s lines)" % (
self.filename, self.encoding, len(self.contents),
len(list(self.lines_iter())))
def parse(filename, encoding=None, default_extension=''):
if not os.path.isfile(filename):
raise IOError(errno.ENOENT, 'File not found', filename)
return ParsedFile(filename,
encoding=encoding,
default_extension=default_extension)