Allow overriding file encoding
Chardet doesn't always seem to correctly detect files encoding correctly in all circumstances, to make it so that a user can specify the exact encoding of there files allow a new config option and a new CLI option that allows for manually overriding the encoding that chardet will try to determine. If enabled chardet detection will no longer run. Fixes bug 1384463 Change-Id: Ie8baf3f79083e1495c7420a9d0569390cad2115e
This commit is contained in:
parent
8b8f22329b
commit
04a710c687
|
@ -59,6 +59,10 @@ Command line usage
|
||||||
--default-extension extension
|
--default-extension extension
|
||||||
Default file extension to use when a file is found
|
Default file extension to use when a file is found
|
||||||
without a file extension.
|
without a file extension.
|
||||||
|
--file-encoding encoding
|
||||||
|
Override encoding to use when attempting to determine
|
||||||
|
an input files text encoding (providing this avoids
|
||||||
|
using `chardet` to automatically detect encoding/s)
|
||||||
--max-line-length int
|
--max-line-length int
|
||||||
maximum allowed line length (default: 79)
|
maximum allowed line length (default: 79)
|
||||||
-e extension, --extension extension
|
-e extension, --extension extension
|
||||||
|
@ -110,6 +114,7 @@ Option Overrides Merges
|
||||||
``ignore-path`` No Yes
|
``ignore-path`` No Yes
|
||||||
``ignore`` No Yes
|
``ignore`` No Yes
|
||||||
``max-line-length`` Yes No
|
``max-line-length`` Yes No
|
||||||
|
``file-encoding`` Yes No
|
||||||
``sphinx`` Yes No
|
``sphinx`` Yes No
|
||||||
===================== =========== ========
|
===================== =========== ========
|
||||||
|
|
||||||
|
|
15
doc8/main.py
15
doc8/main.py
|
@ -109,6 +109,10 @@ def extract_config(args):
|
||||||
cfg['verbose'] = parser.getboolean("doc8", "verbose")
|
cfg['verbose'] = parser.getboolean("doc8", "verbose")
|
||||||
except (configparser.NoSectionError, configparser.NoOptionError):
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
cfg['file_encoding'] = parser.get("doc8", "file-encoding")
|
||||||
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
cfg['default_extension'] = parser.get("doc8", "default-extension")
|
cfg['default_extension'] = parser.get("doc8", "default-extension")
|
||||||
except (configparser.NoSectionError, configparser.NoOptionError):
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
||||||
|
@ -160,6 +164,7 @@ def scan(cfg):
|
||||||
file_iter = utils.find_files(cfg.get('paths', []),
|
file_iter = utils.find_files(cfg.get('paths', []),
|
||||||
cfg.get('extension', []), ignored_paths)
|
cfg.get('extension', []), ignored_paths)
|
||||||
default_extension = cfg.get('default_extension')
|
default_extension = cfg.get('default_extension')
|
||||||
|
file_encoding = cfg.get('file_encoding')
|
||||||
for filename, ignoreable in file_iter:
|
for filename, ignoreable in file_iter:
|
||||||
if ignoreable:
|
if ignoreable:
|
||||||
files_ignored += 1
|
files_ignored += 1
|
||||||
|
@ -167,7 +172,8 @@ def scan(cfg):
|
||||||
print(" Ignoring '%s'" % (filename))
|
print(" Ignoring '%s'" % (filename))
|
||||||
else:
|
else:
|
||||||
f = file_parser.parse(filename,
|
f = file_parser.parse(filename,
|
||||||
default_extension=default_extension)
|
default_extension=default_extension,
|
||||||
|
encoding=file_encoding)
|
||||||
files.append(f)
|
files.append(f)
|
||||||
if cfg.get('verbose'):
|
if cfg.get('verbose'):
|
||||||
print(" Selecting '%s'" % (filename))
|
print(" Selecting '%s'" % (filename))
|
||||||
|
@ -275,6 +281,13 @@ def main():
|
||||||
" found without a file extension.",
|
" found without a file extension.",
|
||||||
default='', dest='default_extension',
|
default='', dest='default_extension',
|
||||||
metavar='extension')
|
metavar='extension')
|
||||||
|
parser.add_argument("--file-encoding", action="store",
|
||||||
|
help="Override encoding to use when attempting"
|
||||||
|
" to determine an input files text encoding "
|
||||||
|
"(providing this avoids using `chardet` to"
|
||||||
|
" automatically detect encoding/s)",
|
||||||
|
default='', dest='file_encoding',
|
||||||
|
metavar='encoding')
|
||||||
parser.add_argument("--max-line-length", action="store", metavar="int",
|
parser.add_argument("--max-line-length", action="store", metavar="int",
|
||||||
type=int,
|
type=int,
|
||||||
help="Maximum allowed line"
|
help="Maximum allowed line"
|
||||||
|
|
|
@ -89,6 +89,22 @@ test
|
||||||
(line, code, msg) = errors[0]
|
(line, code, msg) = errors[0]
|
||||||
self.assertIn(code, check.REPORTS)
|
self.assertIn(code, check.REPORTS)
|
||||||
|
|
||||||
|
def test_correct_length(self):
|
||||||
|
conf = {
|
||||||
|
'max_line_length': 79,
|
||||||
|
'allow_long_titles': True,
|
||||||
|
}
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.rst') as fh:
|
||||||
|
fh.write(b'known exploit in the wild, for example'
|
||||||
|
' \xe2\x80\x93 the time'
|
||||||
|
' between advance notification')
|
||||||
|
fh.flush()
|
||||||
|
|
||||||
|
parsed_file = parser.ParsedFile(fh.name, encoding='utf-8')
|
||||||
|
check = checks.CheckMaxLineLength(conf)
|
||||||
|
errors = list(check.report_iter(parsed_file))
|
||||||
|
self.assertEqual(0, len(errors))
|
||||||
|
|
||||||
def test_unsplittable_length(self):
|
def test_unsplittable_length(self):
|
||||||
content = """
|
content = """
|
||||||
===
|
===
|
||||||
|
|
Loading…
Reference in New Issue