Clean up content-type parsing

Some of the places in which not_binary was being called were
not guarded in a way that made sure that the content-type being
sent to not_binary was just a media-type (without charset etc).

A parse_content_type method was extracted from the
extract_content_type method so that both a headers dict or a simple
header value could be parsed.

This seemed more generic and contractual than making not_binary
extract if required.

Fixes #158
This commit is contained in:
Chris Dent 2016-07-03 14:11:12 -04:00
parent eabee1f83d
commit 4b82d0ddc7
3 changed files with 64 additions and 21 deletions

View File

@ -369,7 +369,8 @@ class HTTPTestCase(unittest.TestCase):
if test['data'] is not '': if test['data'] is not '':
body = self._test_data_to_string( body = self._test_data_to_string(
test['data'], headers.get('content-type', '')) test['data'],
utils.extract_content_type(headers, default='')[0])
else: else:
body = '' body = ''
@ -466,7 +467,7 @@ class HTTPTestCase(unittest.TestCase):
If the output is long, it is limited by either GABBI_MAX_CHARS_OUTPUT If the output is long, it is limited by either GABBI_MAX_CHARS_OUTPUT
in the environment or the MAX_CHARS_OUTPUT constant. in the environment or the MAX_CHARS_OUTPUT constant.
""" """
if utils.not_binary(self.content_type): if utils.not_binary(utils.parse_content_type(self.content_type)[0]):
if expected in iterable: if expected in iterable:
return return

View File

@ -45,6 +45,43 @@ class BinaryTypesTest(unittest.TestCase):
'%s should be binary' % media_type) '%s should be binary' % media_type)
class ParseContentTypeTest(unittest.TestCase):
def test_parse_simple(self):
self.assertEqual(
('text/plain', 'latin-1'),
utils.parse_content_type('text/plain; charset=latin-1'))
def test_parse_extra(self):
self.assertEqual(
('text/plain', 'latin-1'),
utils.parse_content_type(
'text/plain; charset=latin-1; version=1.2'))
def test_parse_default(self):
self.assertEqual(
('text/plain', 'utf-8'),
utils.parse_content_type('text/plain'))
def test_parse_error_default(self):
self.assertEqual(
('text/plain', 'utf-8'),
utils.parse_content_type(
'text/plain; face=ouch; charset=latin-1;'))
def test_parse_nocharset_default(self):
self.assertEqual(
('text/plain', 'utf-8'),
utils.parse_content_type(
'text/plain; face=ouch'))
def test_parse_override_default(self):
self.assertEqual(
('text/plain', 'latin-1'),
utils.parse_content_type(
'text/plain; face=ouch', default_charset='latin-1'))
class ExtractContentTypeTest(unittest.TestCase): class ExtractContentTypeTest(unittest.TestCase):
def test_extract_content_type_default_both(self): def test_extract_content_type_default_both(self):

View File

@ -68,26 +68,11 @@ def decode_response_content(header_dict, content):
return content return content
def extract_content_type(header_dict): def extract_content_type(header_dict, default='application/binary'):
"""Extract content-type from headers.""" """Extract parsed content-type from headers."""
content_type = header_dict.get('content-type', content_type = header_dict.get('content-type',
'application/binary').strip().lower() default).strip().lower()
charset = 'utf-8' return parse_content_type(content_type)
if ';' in content_type:
content_type, parameter_strings = (attr.strip() for attr
in content_type.split(';', 1))
try:
parameter_pairs = [atom.strip().split('=')
for atom in parameter_strings.split(';')]
parameters = {name: value for name, value in parameter_pairs}
charset = parameters['charset']
except (ValueError, KeyError):
# KeyError when no charset found.
# ValueError when the parameter_strings are poorly
# formed (for example trailing ;)
pass
return (content_type, charset)
def get_colorizer(stream): def get_colorizer(stream):
@ -126,6 +111,26 @@ def not_binary(content_type):
content_type.startswith('application/json')) content_type.startswith('application/json'))
def parse_content_type(content_type, default_charset='utf-8'):
"""Parse content type value for media type and charset."""
charset = default_charset
if ';' in content_type:
content_type, parameter_strings = (attr.strip() for attr
in content_type.split(';', 1))
try:
parameter_pairs = [atom.strip().split('=')
for atom in parameter_strings.split(';')]
parameters = {name: value for name, value in parameter_pairs}
charset = parameters['charset']
except (ValueError, KeyError):
# KeyError when no charset found.
# ValueError when the parameter_strings are poorly
# formed (for example trailing ;)
pass
return (content_type, charset)
def host_info_from_target(target, prefix=None): def host_info_from_target(target, prefix=None):
"""Turn url or host:port and target into test destination.""" """Turn url or host:port and target into test destination."""
force_ssl = False force_ssl = False