From 4b82d0ddc71e27d017498cae9248cb4c7d0020d6 Mon Sep 17 00:00:00 2001
From: Chris Dent <cdent@anticdent.org>
Date: Sun, 3 Jul 2016 14:11:12 -0400
Subject: [PATCH] Clean up content-type parsing

Some of the places in which not_binary was being called were
not guarded in a way that made sure that the content-type being
sent to not_binary was just a media-type (without charset etc).

A parse_content_type method was extracted from the
extract_content_type method so that both a headers dict or a simple
header value could be parsed.

This seemed more generic and contractual than making not_binary
extract if required.

Fixes #158
---
 gabbi/case.py             |  5 +++--
 gabbi/tests/test_utils.py | 37 +++++++++++++++++++++++++++++++++
 gabbi/utils.py            | 43 ++++++++++++++++++++++-----------------
 3 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/gabbi/case.py b/gabbi/case.py
index 39475ef..42dcd85 100644
--- a/gabbi/case.py
+++ b/gabbi/case.py
@@ -369,7 +369,8 @@ class HTTPTestCase(unittest.TestCase):
 
         if test['data'] is not '':
             body = self._test_data_to_string(
-                test['data'], headers.get('content-type', ''))
+                test['data'],
+                utils.extract_content_type(headers, default='')[0])
         else:
             body = ''
 
@@ -466,7 +467,7 @@ class HTTPTestCase(unittest.TestCase):
         If the output is long, it is limited by either GABBI_MAX_CHARS_OUTPUT
         in the environment or the MAX_CHARS_OUTPUT constant.
         """
-        if utils.not_binary(self.content_type):
+        if utils.not_binary(utils.parse_content_type(self.content_type)[0]):
             if expected in iterable:
                 return
 
diff --git a/gabbi/tests/test_utils.py b/gabbi/tests/test_utils.py
index d5b8b50..e22ea96 100644
--- a/gabbi/tests/test_utils.py
+++ b/gabbi/tests/test_utils.py
@@ -45,6 +45,43 @@ class BinaryTypesTest(unittest.TestCase):
                              '%s should be binary' % media_type)
 
 
+class ParseContentTypeTest(unittest.TestCase):
+
+    def test_parse_simple(self):
+        self.assertEqual(
+            ('text/plain', 'latin-1'),
+            utils.parse_content_type('text/plain; charset=latin-1'))
+
+    def test_parse_extra(self):
+        self.assertEqual(
+            ('text/plain', 'latin-1'),
+            utils.parse_content_type(
+                'text/plain; charset=latin-1; version=1.2'))
+
+    def test_parse_default(self):
+        self.assertEqual(
+            ('text/plain', 'utf-8'),
+            utils.parse_content_type('text/plain'))
+
+    def test_parse_error_default(self):
+        self.assertEqual(
+            ('text/plain', 'utf-8'),
+            utils.parse_content_type(
+                'text/plain; face=ouch; charset=latin-1;'))
+
+    def test_parse_nocharset_default(self):
+        self.assertEqual(
+            ('text/plain', 'utf-8'),
+            utils.parse_content_type(
+                'text/plain; face=ouch'))
+
+    def test_parse_override_default(self):
+        self.assertEqual(
+            ('text/plain', 'latin-1'),
+            utils.parse_content_type(
+                'text/plain; face=ouch', default_charset='latin-1'))
+
+
 class ExtractContentTypeTest(unittest.TestCase):
 
     def test_extract_content_type_default_both(self):
diff --git a/gabbi/utils.py b/gabbi/utils.py
index 172b4bf..cc90732 100644
--- a/gabbi/utils.py
+++ b/gabbi/utils.py
@@ -68,26 +68,11 @@ def decode_response_content(header_dict, content):
         return content
 
 
-def extract_content_type(header_dict):
-    """Extract content-type from headers."""
+def extract_content_type(header_dict, default='application/binary'):
+    """Extract parsed content-type from headers."""
     content_type = header_dict.get('content-type',
-                                   'application/binary').strip().lower()
-    charset = 'utf-8'
-    if ';' in content_type:
-        content_type, parameter_strings = (attr.strip() for attr
-                                           in content_type.split(';', 1))
-        try:
-            parameter_pairs = [atom.strip().split('=')
-                               for atom in parameter_strings.split(';')]
-            parameters = {name: value for name, value in parameter_pairs}
-            charset = parameters['charset']
-        except (ValueError, KeyError):
-            # KeyError when no charset found.
-            # ValueError when the parameter_strings are poorly
-            # formed (for example trailing ;)
-            pass
-
-    return (content_type, charset)
+                                   default).strip().lower()
+    return parse_content_type(content_type)
 
 
 def get_colorizer(stream):
@@ -126,6 +111,26 @@ def not_binary(content_type):
             content_type.startswith('application/json'))
 
 
+def parse_content_type(content_type, default_charset='utf-8'):
+    """Parse content type value for media type and charset."""
+    charset = default_charset
+    if ';' in content_type:
+        content_type, parameter_strings = (attr.strip() for attr
+                                           in content_type.split(';', 1))
+        try:
+            parameter_pairs = [atom.strip().split('=')
+                               for atom in parameter_strings.split(';')]
+            parameters = {name: value for name, value in parameter_pairs}
+            charset = parameters['charset']
+        except (ValueError, KeyError):
+            # KeyError when no charset found.
+            # ValueError when the parameter_strings are poorly
+            # formed (for example trailing ;)
+            pass
+
+    return (content_type, charset)
+
+
 def host_info_from_target(target, prefix=None):
     """Turn url or host:port and target into test destination."""
     force_ssl = False