Browse Source

Allow overriding file encoding

Chardet doesn't always seem to correctly detect files encoding
correctly in all circumstances, to make it so that a user can
specify the exact encoding of there files allow a new config
option and a new CLI option that allows for manually overriding
the encoding that chardet will try to determine.

If enabled chardet detection will no longer run.

Fixes bug 1384463

Change-Id: Ie8baf3f79083e1495c7420a9d0569390cad2115e
tags/0.5.0
Joshua Harlow 4 years ago
parent
commit
04a710c687
3 changed files with 35 additions and 1 deletions
  1. 5
    0
      README.rst
  2. 14
    1
      doc8/main.py
  3. 16
    0
      doc8/tests/test_checks.py

+ 5
- 0
README.rst View File

@@ -59,6 +59,10 @@ Command line usage
59 59
       --default-extension extension
60 60
                             Default file extension to use when a file is found
61 61
                             without a file extension.
62
+      --file-encoding encoding
63
+                            Override encoding to use when attempting to determine
64
+                            an input files text encoding (providing this avoids
65
+                            using `chardet` to automatically detect encoding/s)
62 66
       --max-line-length int
63 67
                             maximum allowed line length (default: 79)
64 68
       -e extension, --extension extension
@@ -110,6 +114,7 @@ Option                 Overrides    Merges
110 114
 ``ignore-path``        No           Yes
111 115
 ``ignore``             No           Yes
112 116
 ``max-line-length``    Yes          No
117
+``file-encoding``      Yes          No
113 118
 ``sphinx``             Yes          No
114 119
 =====================  ===========  ========
115 120
 

+ 14
- 1
doc8/main.py View File

@@ -109,6 +109,10 @@ def extract_config(args):
109 109
         cfg['verbose'] = parser.getboolean("doc8", "verbose")
110 110
     except (configparser.NoSectionError, configparser.NoOptionError):
111 111
         pass
112
+    try:
113
+        cfg['file_encoding'] = parser.get("doc8", "file-encoding")
114
+    except (configparser.NoSectionError, configparser.NoOptionError):
115
+        pass
112 116
     try:
113 117
         cfg['default_extension'] = parser.get("doc8", "default-extension")
114 118
     except (configparser.NoSectionError, configparser.NoOptionError):
@@ -160,6 +164,7 @@ def scan(cfg):
160 164
     file_iter = utils.find_files(cfg.get('paths', []),
161 165
                                  cfg.get('extension', []), ignored_paths)
162 166
     default_extension = cfg.get('default_extension')
167
+    file_encoding = cfg.get('file_encoding')
163 168
     for filename, ignoreable in file_iter:
164 169
         if ignoreable:
165 170
             files_ignored += 1
@@ -167,7 +172,8 @@ def scan(cfg):
167 172
                 print("  Ignoring '%s'" % (filename))
168 173
         else:
169 174
             f = file_parser.parse(filename,
170
-                                  default_extension=default_extension)
175
+                                  default_extension=default_extension,
176
+                                  encoding=file_encoding)
171 177
             files.append(f)
172 178
             if cfg.get('verbose'):
173 179
                 print("  Selecting '%s'" % (filename))
@@ -275,6 +281,13 @@ def main():
275 281
                              " found without a file extension.",
276 282
                         default='', dest='default_extension',
277 283
                         metavar='extension')
284
+    parser.add_argument("--file-encoding", action="store",
285
+                        help="Override encoding to use when attempting"
286
+                             " to determine an input files text encoding "
287
+                             "(providing this avoids using `chardet` to"
288
+                             " automatically detect encoding/s)",
289
+                        default='', dest='file_encoding',
290
+                        metavar='encoding')
278 291
     parser.add_argument("--max-line-length", action="store", metavar="int",
279 292
                         type=int,
280 293
                         help="Maximum allowed line"

+ 16
- 0
doc8/tests/test_checks.py View File

@@ -89,6 +89,22 @@ test
89 89
                 (line, code, msg) = errors[0]
90 90
                 self.assertIn(code, check.REPORTS)
91 91
 
92
+    def test_correct_length(self):
93
+        conf = {
94
+            'max_line_length': 79,
95
+            'allow_long_titles': True,
96
+        }
97
+        with tempfile.NamedTemporaryFile(suffix='.rst') as fh:
98
+            fh.write(b'known exploit in the wild, for example'
99
+                     ' \xe2\x80\x93 the time'
100
+                     ' between advance notification')
101
+            fh.flush()
102
+
103
+            parsed_file = parser.ParsedFile(fh.name, encoding='utf-8')
104
+            check = checks.CheckMaxLineLength(conf)
105
+            errors = list(check.report_iter(parsed_file))
106
+            self.assertEqual(0, len(errors))
107
+
92 108
     def test_unsplittable_length(self):
93 109
         content = """
94 110
 ===

Loading…
Cancel
Save