Provide some s3 helper methods for other middlewares to use.

get_s3_access_key_id returns the S3 access_key_id user for the request
is_s3_req checks whether a request looks like it ought to be an S3 request
parse_path returns a wsgi string
extract_bucket_and_key extracts bucket and object from the request's PATH_INFO

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Shreeya Deshpande <shreeyad@nvidia.com>

Change-Id: Iaf86a07238cca6700dee736f55d4c0672cccf1b1
Signed-off-by: Shreeya Deshpande <shreeyad@nvidia.com>
This commit is contained in:
Yan Xiao
2025-01-30 15:55:44 -05:00
committed by Shreeya Deshpande
parent 48a5d5e42f
commit 9d7e7e27a5
5 changed files with 327 additions and 45 deletions

View File

@@ -26,6 +26,18 @@ class ACLError(S3Exception):
pass
class InvalidBucketNameParseError(S3Exception):
def __init__(self, bucket):
self.bucket_name = bucket
class InvalidURIParseError(S3Exception):
def __init__(self, uri):
self.uri = uri
class InvalidSubresource(S3Exception):
def __init__(self, resource, cause):
self.resource = resource

View File

@@ -39,7 +39,6 @@ from swift.common.http import HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED, \
HTTP_TOO_MANY_REQUESTS, HTTP_RATE_LIMITED, is_success, \
HTTP_CLIENT_CLOSED_REQUEST
from swift.common.constraints import check_utf8
from swift.proxy.controllers.base import get_container_info
from swift.common.request_helpers import check_path_header
@@ -70,7 +69,9 @@ from swift.common.middleware.s3api.utils import utf8encode, \
S3Timestamp, mktime, MULTIUPLOAD_SUFFIX
from swift.common.middleware.s3api.subresource import decode_acl, encode_acl
from swift.common.middleware.s3api.utils import sysmeta_header, \
validate_bucket_name, Config
parse_host, parse_path, Config
from swift.common.middleware.s3api.exception import \
InvalidBucketNameParseError, InvalidURIParseError
from swift.common.middleware.s3api.acl_utils import handle_acl_header
@@ -171,6 +172,15 @@ def _header_acl_property(resource):
doc='Get and set the %s acl property' % resource)
def _parse_path(req, bucket_in_host, dns_compliant_bucket_names):
try:
return parse_path(req, bucket_in_host, dns_compliant_bucket_names)
except InvalidURIParseError as err:
raise InvalidURI(err.uri)
except InvalidBucketNameParseError as err:
raise InvalidBucketName(err.bucket_name)
class HashingInput(InputProxy):
"""
wsgi.input wrapper to verify the SHA256 of the input as it's read.
@@ -1055,8 +1065,10 @@ class S3Request(swob.Request):
self.location = self.conf.location
self._timestamp = None
self.access_key, self.signature = self._parse_auth_info()
self.bucket_in_host = self._parse_host()
self.container_name, self.object_name = self._parse_uri()
self.bucket_in_host = parse_host(self.environ,
self.conf.storage_domains)
self.container_name, self.object_name = _parse_path(
self, self.bucket_in_host, self.conf.dns_compliant_bucket_names)
self._validate_headers()
if isinstance(self, SigV4Mixin):
# this is a deliberate but only partial shift away from the
@@ -1214,46 +1226,6 @@ class S3Request(swob.Request):
def _is_x_amz_content_sha256_required(self):
return False
def _parse_host(self):
if not self.conf.storage_domains:
return None
if 'HTTP_HOST' in self.environ:
given_domain = self.environ['HTTP_HOST']
elif 'SERVER_NAME' in self.environ:
given_domain = self.environ['SERVER_NAME']
else:
return None
port = ''
if ':' in given_domain:
given_domain, port = given_domain.rsplit(':', 1)
for storage_domain in self.conf.storage_domains:
if not storage_domain.startswith('.'):
storage_domain = '.' + storage_domain
if given_domain.endswith(storage_domain):
return given_domain[:-len(storage_domain)]
return None
def _parse_uri(self):
# NB: returns WSGI strings
if not check_utf8(swob.wsgi_to_str(self.environ['PATH_INFO'])):
raise InvalidURI(self.path)
if self.bucket_in_host:
obj = self.environ['PATH_INFO'][1:] or None
return self.bucket_in_host, obj
bucket, obj = self.split_path(0, 2, True)
if bucket and not validate_bucket_name(
bucket, self.conf.dns_compliant_bucket_names):
# Ignore GET service case
raise InvalidBucketName(bucket)
return bucket, obj
def _parse_query_authentication(self):
"""
Parse v2 authentication query args

View File

@@ -22,6 +22,10 @@ import time
import uuid
from swift.common import utils
from swift.common.constraints import check_utf8
from swift.common.swob import wsgi_to_str
from swift.common.middleware.s3api.exception import \
InvalidBucketNameParseError, InvalidURIParseError
MULTIUPLOAD_SUFFIX = '+segments'
@@ -105,6 +109,128 @@ def validate_bucket_name(name, dns_compliant_bucket_names):
return True
def get_s3_access_key_id(req):
"""
Return the S3 access_key_id user for the request,
or None if it does not look like an S3 request.
:param req: a swob.Request instance
:returns: access_key_id if available, else None
"""
authorization = req.headers.get('Authorization', '')
if authorization.startswith('AWS '):
# v2
return authorization[4:].rsplit(':', 1)[0]
if authorization.startswith('AWS4-HMAC-SHA256 '):
# v4
return authorization.partition('Credential=')[2].split('/', 1)[0]
params = req.params
if 'AWSAccessKeyId' in params:
# v2
return params['AWSAccessKeyId']
if 'X-Amz-Credential' in params:
# v4
return params['X-Amz-Credential'].split('/', 1)[0]
return None
def is_s3_req(req):
"""
Check whether a request looks like it ought to be an S3 request.
:param req: a swob.Request instance
:returns: True if access_key_id is available, False if not
"""
return bool(get_s3_access_key_id(req))
def parse_host(environ, storage_domains):
"""
A bucket-in-host request has the bucket name as the first part of a
``.``-separated host. If the host ends with any of
the given storage_domains then the bucket name is returned.
Otherwise ``None`` is returned.
:param environ: an environment dict
:param storage_domains: a list of storage domains for which bucket-in-host
is supported.
:returns: bucket name or None
"""
if 'HTTP_HOST' in environ:
given_domain = environ['HTTP_HOST']
elif 'SERVER_NAME' in environ:
given_domain = environ['SERVER_NAME']
else:
return None
if ':' in given_domain:
given_domain = given_domain.rsplit(':', 1)[0]
for storage_domain in storage_domains:
if not storage_domain.startswith('.'):
storage_domain = '.' + storage_domain
if given_domain.endswith(storage_domain):
return given_domain[:-len(storage_domain)]
return None
def parse_path(req, bucket_in_host, dns_compliant_bucket_names):
"""
:params req: a swob.Request instance
:params bucket_in_host: A bucket-in-host request has the bucket name as
the first part of a ``.``-separated host.
:params dns_compliant_bucket_names: whether to validate that the bucket
name must be dns compliant
:returns: WSGI string
"""
if not check_utf8(wsgi_to_str(req.environ['PATH_INFO'])):
raise InvalidURIParseError(req.path)
if bucket_in_host:
obj = req.environ['PATH_INFO'][1:] or None
return bucket_in_host, obj
bucket, obj = req.split_path(0, 2, True)
if bucket and not validate_bucket_name(
bucket, dns_compliant_bucket_names):
# Ignore GET service case
raise InvalidBucketNameParseError(bucket)
return bucket, obj
def extract_bucket_and_key(req, storage_domains,
dns_compliant_bucket_names):
"""
Extract the bucket and object key from the request's PATH_INFO. Support
bucket-in-host if storage_domains and HTTP_HOST or SERVER_NAME are
specified. Otherwise the bucket is parsed from PATH_INFO.
:param req: a swob.Request instance
:param storage_domains: a list of storage domains for which bucket-in-host
is supported.
:param dns_compliant_bucket_names: whether to validate that the bucket
name must be dns compliant
:returns: a tuple of (bucket, key). If the request path is invalid
the tuple (None, None) is returned.
"""
try:
bucket_in_host = parse_host(req.environ, storage_domains)
bucket, key = parse_path(
req, bucket_in_host, dns_compliant_bucket_names)
except (InvalidBucketNameParseError, InvalidURIParseError):
bucket, key = None, None
return bucket, key
class S3Timestamp(utils.Timestamp):
S3_XML_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z"

View File

@@ -1385,7 +1385,7 @@ class TestS3ApiMiddleware(S3ApiTestCase):
# validation in the following test.
# NOTE: eventlet's PATH_INFO is unquoted
with patch('swift.common.middleware.s3api.s3request.'
with patch('swift.common.middleware.s3api.utils.'
'validate_bucket_name'):
verify('27ba31df5dbc6e063d8f87d62eb07143'
'f7f271c5330a917840586ac1c85b6f6b',

View File

@@ -17,7 +17,9 @@ import os
import time
import unittest
from swift.common.swob import Request
from swift.common.middleware.s3api import utils, s3request
from swift.common.middleware.s3api.exception import InvalidBucketNameParseError
strs = [
('Owner', 'owner'),
@@ -42,6 +44,7 @@ class TestS3ApiUtils(unittest.TestCase):
self.assertTrue(utils.validate_bucket_name('bucket-1', True))
self.assertTrue(utils.validate_bucket_name('b.u.c.k.e.t', True))
self.assertTrue(utils.validate_bucket_name('a' * 63, True))
self.assertTrue(utils.validate_bucket_name('v1.0', True))
# bad cases
self.assertFalse(utils.validate_bucket_name('a', True))
self.assertFalse(utils.validate_bucket_name('aa', True))
@@ -56,6 +59,7 @@ class TestS3ApiUtils(unittest.TestCase):
self.assertFalse(utils.validate_bucket_name('bucket-.bucket', True))
self.assertFalse(utils.validate_bucket_name('bucket..bucket', True))
self.assertFalse(utils.validate_bucket_name('a' * 64, True))
self.assertFalse(utils.validate_bucket_name('v1', False))
def test_validate_bucket_name_with_dns_compliant_bucket_names_false(self):
# good cases
@@ -81,6 +85,174 @@ class TestS3ApiUtils(unittest.TestCase):
self.assertFalse(utils.validate_bucket_name('bucket.', False))
self.assertFalse(utils.validate_bucket_name('a' * 256, False))
def test_extract_bucket_and_key(self):
req = Request.blank(
'/bucket/object',
environ={
'REQUEST_METHOD': 'GET',
},
headers={
'Authorization': 'AWS test:tester:hmac',
},
)
cont, obj = utils.extract_bucket_and_key(req, [], False)
self.assertEqual(cont, 'bucket')
self.assertEqual(obj, 'object')
def test_extract_bucket_and_key_invalid_character(self):
req = Request.blank(
'/bucket/\x00object',
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac'},
)
self.assertEqual((None, None),
utils.extract_bucket_and_key(req, [], False))
def test_extract_bucket_and_key_invalid_bucket(self):
req = Request.blank(
'/b/object',
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac'},
)
self.assertEqual((None, None),
utils.extract_bucket_and_key(req, [], False))
def test_extract_bucket_and_key_invalid_dns_compliant(self):
req = Request.blank(
'/BUCKET/object',
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac'},
)
self.assertEqual(('BUCKET', 'object'),
utils.extract_bucket_and_key(req, [], False))
self.assertEqual((None, None),
utils.extract_bucket_and_key(req, [], True))
def test_extract_bucket_and_key_bucket_in_host(self):
req = Request.blank(
'/object/xyz',
environ={'REQUEST_METHOD': 'GET',
'HTTP_HOST': 'bucket.localhost'},
headers={'Authorization': 'AWS test:atester:hmac'},
)
self.assertEqual(
('bucket', 'object/xyz'),
utils.extract_bucket_and_key(req, ['localhost'], False))
def test_parse_host(self):
req = Request.blank(
'/bucket/object',
environ={
'REQUEST_METHOD': 'GET',
'SERVER_NAME': 'foo.boo'
},
)
del req.environ['HTTP_HOST']
self.assertEqual(utils.parse_host(req.environ, []), None)
self.assertEqual(utils.parse_host(req.environ, ['boo']), 'foo')
req = Request.blank(
'/bucket/object',
environ={
'REQUEST_METHOD': 'GET',
'HTTP_HOST': 'buckets.localhost',
'SERVER_NAME': 'foo.localhost',
},
)
self.assertEqual(utils.parse_host(req.environ, []), None)
self.assertEqual(utils.parse_host(
req.environ, ['notlocalhost']), None)
self.assertEqual(utils.parse_host(
req.environ, ['localhost']), 'buckets')
self.assertEqual(utils.parse_host(
req.environ, ['.localhost']), 'buckets')
self.assertEqual(utils.parse_host(
req.environ, ['notlocalhost', '.localhost']), 'buckets')
def test_parse_path(self):
req = Request.blank(
'/bucket/object',
environ={'REQUEST_METHOD': 'GET'},
)
bucket, obj = utils.parse_path(req, None, False)
self.assertEqual(bucket, 'bucket')
self.assertEqual(obj, 'object')
bucket, obj = utils.parse_path(req, None, True)
self.assertEqual(bucket, 'bucket')
self.assertEqual(obj, 'object')
bucket, obj = utils.parse_path(req, 'boo', True)
self.assertEqual(bucket, 'boo')
self.assertEqual(obj, 'bucket/object')
def test_parse_path_dns_compliant_bucket_names(self):
req = Request.blank(
'/BUCKET/object',
environ={'REQUEST_METHOD': 'GET'},
)
with self.assertRaises(InvalidBucketNameParseError):
utils.parse_path(req, None, True)
# non-compliant is ok if it somehow came in the host??
bucket, obj = utils.parse_path(req, 'BUCKET', True)
self.assertEqual(bucket, 'BUCKET')
self.assertEqual(obj, 'BUCKET/object')
def test_get_s3_access_key_id_not_s3_req(self):
headers = {'Authorization': 'not AWS my_access_key_id:signature'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
headers=headers)
self.assertIsNone(utils.get_s3_access_key_id(req))
def test_get_s3_access_key_id_v2_header(self):
headers = {'Authorization': 'AWS my_access_key_id:signature'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
headers=headers)
self.assertEqual('my_access_key_id', utils.get_s3_access_key_id(req))
def test_get_s3_access_key_id_v2_param(self):
params = {'AWSAccessKeyId': 'my_access_key_id'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
params=params)
self.assertEqual('my_access_key_id', utils.get_s3_access_key_id(req))
def test_get_s3_access_key_id_v4_header(self):
headers = {
'Authorization':
'AWS4-HMAC-SHA256 '
'Credential=my_access_key_id/20130524/us-east-1/s3/'
'aws4_request,'
'SignedHeaders=host;range;x-amz-date,'
'Signature=fe5f80f77d5fa3beca038a248ff027d0445342fe2855ddc963'
'176630326f1024'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
headers=headers)
self.assertEqual('my_access_key_id', utils.get_s3_access_key_id(req))
def test_get_s3_access_key_id_v4_param(self):
params = {'X-Amz-Credential':
'my_access_key_id/20130721/us-east-1/s3/aws4_request'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
params=params)
self.assertEqual('my_access_key_id', utils.get_s3_access_key_id(req))
def test_is_s3_req(self):
headers = {'Authorization': 'not AWS my_access_key_id:signature'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
headers=headers)
self.assertIs(False, utils.is_s3_req(req))
headers = {'Authorization': 'AWS my_access_key_id:signature'}
req = Request.blank('/v1/a/',
environ={'REQUEST_METHOD': 'GET'},
headers=headers)
self.assertIs(True, utils.is_s3_req(req))
def test_mktime(self):
date_headers = [
'Thu, 01 Jan 1970 00:00:00 -0000',