diff --git a/swift/common/middleware/crypto/crypto_utils.py b/swift/common/middleware/crypto/crypto_utils.py index 5cd2740bf9..980bebb912 100644 --- a/swift/common/middleware/crypto/crypto_utils.py +++ b/swift/common/middleware/crypto/crypto_utils.py @@ -23,9 +23,8 @@ import urllib.parse from swift.common.exceptions import EncryptionException, UnknownSecretIdError from swift.common.swob import HTTPInternalServerError -from swift.common.utils import get_logger +from swift.common.utils import get_logger, parse_header from swift.common.wsgi import WSGIContext -from cgi import parse_header CRYPTO_KEY_CALLBACK = 'swift.callback.fetch_crypto_keys' diff --git a/swift/common/middleware/s3api/s3api.py b/swift/common/middleware/s3api/s3api.py index cb498de3c3..dfef93a0d3 100644 --- a/swift/common/middleware/s3api/s3api.py +++ b/swift/common/middleware/s3api/s3api.py @@ -141,7 +141,6 @@ https://github.com/swiftstack/s3compat in detail. """ -from cgi import parse_header import json from paste.deploy import loadwsgi from urllib.parse import parse_qs @@ -159,7 +158,8 @@ from swift.common.middleware.s3api.s3request import get_request_class from swift.common.middleware.s3api.s3response import ErrorResponse, \ InternalError, MethodNotAllowed, S3ResponseBase, S3NotImplemented from swift.common.utils import get_logger, config_true_value, \ - config_positive_int_value, split_path, closing_if_possible, list_from_csv + config_positive_int_value, split_path, closing_if_possible, \ + list_from_csv, parse_header from swift.common.middleware.s3api.utils import Config from swift.common.middleware.s3api.acl_handlers import get_acl_handler from swift.common.registry import register_swift_info, \ diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index 833b5a1bde..6c7389ac2c 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -339,7 +339,6 @@ metadata which can be used for stats and billing purposes. """ import base64 -from cgi import parse_header from collections import defaultdict from datetime import datetime import json @@ -362,7 +361,7 @@ from swift.common.utils import get_logger, config_true_value, \ override_bytes_from_content_type, split_path, \ RateLimitedIterator, quote, closing_if_possible, \ LRUCache, StreamingPile, strict_b64decode, Timestamp, friendly_close, \ - md5 + md5, parse_header from swift.common.registry import register_swift_info from swift.common.request_helpers import SegmentedIterable, \ get_sys_meta_prefix, update_etag_is_at_header, resolve_etag_is_at_header, \ diff --git a/swift/common/middleware/symlink.py b/swift/common/middleware/symlink.py index d951577d91..8f8d6e4e53 100644 --- a/swift/common/middleware/symlink.py +++ b/swift/common/middleware/symlink.py @@ -199,11 +199,10 @@ configuration steps are required: import json import os -from cgi import parse_header from swift.common.utils import get_logger, split_path, \ MD5_OF_EMPTY_STRING, close_if_possible, closing_if_possible, \ - config_true_value, drain_and_close + config_true_value, drain_and_close, parse_header from swift.common.registry import register_swift_info from swift.common.constraints import check_account_format from swift.common.wsgi import WSGIContext, make_subrequest, \ @@ -289,7 +288,7 @@ def _validate_and_prep_request_headers(req): request=req, content_type='text/plain') etag = normalize_etag(req.headers.get(TGT_ETAG_SYMLINK_HDR, None)) if etag and any(c in etag for c in ';"\\'): - # See cgi.parse_header for why the above chars are problematic + # See utils.parse_header for why the above chars are problematic raise HTTPBadRequest( body='Bad %s format' % TGT_ETAG_SYMLINK_HDR.title(), request=req, content_type='text/plain') diff --git a/swift/common/middleware/versioned_writes/object_versioning.py b/swift/common/middleware/versioned_writes/object_versioning.py index 4083fc8ce6..2a29d25ebc 100644 --- a/swift/common/middleware/versioned_writes/object_versioning.py +++ b/swift/common/middleware/versioned_writes/object_versioning.py @@ -147,7 +147,6 @@ import itertools import json import time -from cgi import parse_header from urllib.parse import unquote from swift.common.constraints import MAX_FILE_SIZE, valid_api_version, \ @@ -169,7 +168,7 @@ from swift.common.swob import HTTPPreconditionFailed, HTTPServiceUnavailable, \ from swift.common.storage_policy import POLICIES from swift.common.utils import get_logger, Timestamp, drain_and_close, \ config_true_value, close_if_possible, closing_if_possible, \ - FileLikeIter, split_path, parse_content_type, RESERVED_STR + FileLikeIter, split_path, parse_content_type, parse_header, RESERVED_STR from swift.common.wsgi import WSGIContext, make_pre_authed_request from swift.proxy.controllers.base import get_container_info diff --git a/swift/common/utils/__init__.py b/swift/common/utils/__init__.py index dacfcb080a..d5518d5a05 100644 --- a/swift/common/utils/__init__.py +++ b/swift/common/utils/__init__.py @@ -2840,6 +2840,11 @@ _rfc_extension_pattern = re.compile( r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token + r'|"(?:[^"\\]|\\.)*"))?)') +_loose_token = r'[^()<>@,;:\"\[\]?={}\x00-\x20\x7f]+' # nosec B105 +_loose_extension_pattern = re.compile( + r'(?:\s*;\s*(' + _loose_token + r")\s*(?:=\s*(" + _loose_token + + r'|"(?:[^"\\]|\\.)*"))?)') + _content_range_pattern = re.compile(r'^bytes (\d+)-(\d+)/(\d+)$') @@ -2861,7 +2866,7 @@ def parse_content_range(content_range): return tuple(int(x) for x in found.groups()) -def parse_content_type(content_type): +def parse_content_type(content_type, strict=True): """ Parse a content-type and its parameters into values. RFC 2616 sec 14.17 and 3.7 are pertinent. @@ -2873,19 +2878,48 @@ def parse_content_type(content_type): ('text/plain', [('charset, 'UTF-8'), ('level', '1')]) :param content_type: content_type to parse + :param strict: ignore ``/`` and any following characters in parameter + tokens. If ``strict`` is True a parameter such as ``x=a/b`` will be + parsed as ``x=a``. If ``strict`` is False a parameter such as ``x=a/b`` + will be parsed as ``x=a/b``. The default is True. :returns: a tuple containing (content type, list of k, v parameter tuples) """ parm_list = [] if ';' in content_type: content_type, parms = content_type.split(';', 1) parms = ';' + parms - for m in _rfc_extension_pattern.findall(parms): + pat = _rfc_extension_pattern if strict else _loose_extension_pattern + for m in pat.findall(parms): key = m[0].strip() value = m[1].strip() parm_list.append((key, value)) return content_type, parm_list +def parse_header(value): + """ + Parse a header value to extract the first part and a dict of any + following parameters. + + The ``value`` to parse should be of the form: + + ``[;=][; =]...`` + + ```` should be of the form ``[/]``, ```` + should be a ``token``, and ```` should be either a ``token`` or + ``quoted-string``, where ``token`` and ``quoted-string`` are defined by RFC + 2616 section 2.2. + + :param value: the header value to parse. + :return: a tuple (first part, dict(params)). + """ + # note: this does not behave *exactly* like cgi.parse_header (which this + # replaces) w.r.t. parsing non-token characters in param values (e.g. the + # null character) , but it's sufficient for our use cases. + token, params = parse_content_type(value, strict=False) + return token, dict(params) + + def extract_swift_bytes(content_type): """ Parse a content-type and return a tuple containing: diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 7a77677651..a1457a818b 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -1853,6 +1853,77 @@ cluster_dfw1 = http://dfw1.host/v1/ self.assertEqual( utils.parse_content_type(r'text/plain; x="\""; a'), ('text/plain', [('x', r'"\""'), ('a', '')])) + self.assertEqual( + utils.parse_content_type(r'text/plain; x=a/b; y'), + ('text/plain', [('x', 'a'), ('y', '')])) + + self.assertEqual( + utils.parse_content_type(r'text/plain; x=a/b; y', strict=True), + ('text/plain', [('x', 'a'), ('y', '')])) + self.assertEqual( + utils.parse_content_type(r'text/plain; x=a/b; y', strict=False), + ('text/plain', [('x', 'a/b'), ('y', '')])) + + def test_parse_header(self): + self.assertEqual( + utils.parse_header('text/plain'), ('text/plain', {})) + self.assertEqual( + utils.parse_header('text/plain;'), ('text/plain', {})) + self.assertEqual( + utils.parse_header(r'text/plain; x=a/b; y = z'), + ('text/plain', {'x': 'a/b', 'y': 'z'})) + self.assertEqual( + utils.parse_header(r'text/plain; x=a/b; y'), + ('text/plain', {'x': 'a/b', 'y': ''})) + self.assertEqual( + utils.parse_header('etag; x=a/b; y'), + ('etag', {'x': 'a/b', 'y': ''})) + + def test_parse_headers_chars_in_params(self): + def do_test(val): + self.assertEqual( + utils.parse_header('text/plain; x=a%sb' % val), + ('text/plain', {'x': 'a%sb' % val})) + + do_test('\N{SNOWMAN}') + do_test('\\') + do_test('%') + do_test('-') + do_test('-') + do_test('&') + # wsgi_quote'd null character is ok... + do_test('%00') + + def test_parse_header_non_token_chars_in_params(self): + def do_test(val): + # character terminates individual param parsing... + self.assertEqual( + utils.parse_header('text/plain; x=a%sb; y=z' % val), + ('text/plain', {'x': 'a', 'y': 'z'}), + 'val=%s' % val + ) + + non_token_chars = '()<>@,:[]?={}\x00"' + + for ch in non_token_chars: + do_test(ch) + + do_test(' space oddity ') + + def test_parse_header_quoted_string_in_params(self): + def do_test(val): + self.assertEqual( + utils.parse_header('text/plain; x="%s"; y=z' % val), + ('text/plain', {'x': '"%s"' % val, 'y': 'z'}), + 'val=%s' % val + ) + + non_token_chars = '()<>@,:[]?={}\x00' + + for ch in non_token_chars: + do_test(ch) + + do_test(' space oddity ') def test_override_bytes_from_content_type(self): listing_dict = {