From 4b82d0ddc71e27d017498cae9248cb4c7d0020d6 Mon Sep 17 00:00:00 2001 From: Chris Dent Date: Sun, 3 Jul 2016 14:11:12 -0400 Subject: [PATCH] Clean up content-type parsing Some of the places in which not_binary was being called were not guarded in a way that made sure that the content-type being sent to not_binary was just a media-type (without charset etc). A parse_content_type method was extracted from the extract_content_type method so that both a headers dict or a simple header value could be parsed. This seemed more generic and contractual than making not_binary extract if required. Fixes #158 --- gabbi/case.py | 5 +++-- gabbi/tests/test_utils.py | 37 +++++++++++++++++++++++++++++++++ gabbi/utils.py | 43 ++++++++++++++++++++++----------------- 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/gabbi/case.py b/gabbi/case.py index 39475ef..42dcd85 100644 --- a/gabbi/case.py +++ b/gabbi/case.py @@ -369,7 +369,8 @@ class HTTPTestCase(unittest.TestCase): if test['data'] is not '': body = self._test_data_to_string( - test['data'], headers.get('content-type', '')) + test['data'], + utils.extract_content_type(headers, default='')[0]) else: body = '' @@ -466,7 +467,7 @@ class HTTPTestCase(unittest.TestCase): If the output is long, it is limited by either GABBI_MAX_CHARS_OUTPUT in the environment or the MAX_CHARS_OUTPUT constant. """ - if utils.not_binary(self.content_type): + if utils.not_binary(utils.parse_content_type(self.content_type)[0]): if expected in iterable: return diff --git a/gabbi/tests/test_utils.py b/gabbi/tests/test_utils.py index d5b8b50..e22ea96 100644 --- a/gabbi/tests/test_utils.py +++ b/gabbi/tests/test_utils.py @@ -45,6 +45,43 @@ class BinaryTypesTest(unittest.TestCase): '%s should be binary' % media_type) +class ParseContentTypeTest(unittest.TestCase): + + def test_parse_simple(self): + self.assertEqual( + ('text/plain', 'latin-1'), + utils.parse_content_type('text/plain; charset=latin-1')) + + def test_parse_extra(self): + self.assertEqual( + ('text/plain', 'latin-1'), + utils.parse_content_type( + 'text/plain; charset=latin-1; version=1.2')) + + def test_parse_default(self): + self.assertEqual( + ('text/plain', 'utf-8'), + utils.parse_content_type('text/plain')) + + def test_parse_error_default(self): + self.assertEqual( + ('text/plain', 'utf-8'), + utils.parse_content_type( + 'text/plain; face=ouch; charset=latin-1;')) + + def test_parse_nocharset_default(self): + self.assertEqual( + ('text/plain', 'utf-8'), + utils.parse_content_type( + 'text/plain; face=ouch')) + + def test_parse_override_default(self): + self.assertEqual( + ('text/plain', 'latin-1'), + utils.parse_content_type( + 'text/plain; face=ouch', default_charset='latin-1')) + + class ExtractContentTypeTest(unittest.TestCase): def test_extract_content_type_default_both(self): diff --git a/gabbi/utils.py b/gabbi/utils.py index 172b4bf..cc90732 100644 --- a/gabbi/utils.py +++ b/gabbi/utils.py @@ -68,26 +68,11 @@ def decode_response_content(header_dict, content): return content -def extract_content_type(header_dict): - """Extract content-type from headers.""" +def extract_content_type(header_dict, default='application/binary'): + """Extract parsed content-type from headers.""" content_type = header_dict.get('content-type', - 'application/binary').strip().lower() - charset = 'utf-8' - if ';' in content_type: - content_type, parameter_strings = (attr.strip() for attr - in content_type.split(';', 1)) - try: - parameter_pairs = [atom.strip().split('=') - for atom in parameter_strings.split(';')] - parameters = {name: value for name, value in parameter_pairs} - charset = parameters['charset'] - except (ValueError, KeyError): - # KeyError when no charset found. - # ValueError when the parameter_strings are poorly - # formed (for example trailing ;) - pass - - return (content_type, charset) + default).strip().lower() + return parse_content_type(content_type) def get_colorizer(stream): @@ -126,6 +111,26 @@ def not_binary(content_type): content_type.startswith('application/json')) +def parse_content_type(content_type, default_charset='utf-8'): + """Parse content type value for media type and charset.""" + charset = default_charset + if ';' in content_type: + content_type, parameter_strings = (attr.strip() for attr + in content_type.split(';', 1)) + try: + parameter_pairs = [atom.strip().split('=') + for atom in parameter_strings.split(';')] + parameters = {name: value for name, value in parameter_pairs} + charset = parameters['charset'] + except (ValueError, KeyError): + # KeyError when no charset found. + # ValueError when the parameter_strings are poorly + # formed (for example trailing ;) + pass + + return (content_type, charset) + + def host_info_from_target(target, prefix=None): """Turn url or host:port and target into test destination.""" force_ssl = False